Steroid Data Analysis

Pauli Tikka

2025-01-29

Introduction

# Welcome to the 'steroid data analysis' webpage! 

# The procedures and explanations to make all the analysis and plots are in their individual chapters below. 
# These methods could be also easily applied to other types of data sets and metabolites than 'steroids' and their respective metadata per se. 
# In addition, there is a small 'disclaimer' also at the end of this webpage to emphasize that this site is mainly for educational purposes.
# Please let me know if you have any questions. For that, use the 'following' email: patati at the university of Turku

Loading Required R Packages

# echo=FALSE is too good
# Set library paths if needed
# .libPaths(c("C:/Program Files/R/R-4.4.1/library", .libPaths()))

# List of libraries to load (alphabetically sorted)
packages <- c("bigsnpr", "binilib", "brickster", "car", "censReg", "circlize", "ComplexHeatmap", "correlation", 
              "corrplot", "daiR", "datarium", "dmetar", "dplyr", "effsize", "extrafont", "forcats", "fs", "FSA", 
              "ggcorrplot", "ggforce", "ggforestplot", "ggplot2", "ggplotify", "ggpubr", "ggsankey", "ggsankeyfier", 
              "ggh4x", "ggtext", "glmnet", "grid", "Hmisc", "hrbrthemes", "igraph", "insight", "lavaan", "lmtest", 
              "lme4", "lsr", "magick", "magrittr", "Maaslin2", "mdatools", "mediation", "meta", "mgcv", "mlma", 
              "MOFA2", "pheatmap", "PerformanceAnalytics", "pathviewr", "plyr", "plotrix", "ppcor", "prettydoc", 
              "psych", "quantreg", "qpgraph", "ragg", "RColorBrewer", "rcompanion", "readxl", "remotes", "reshape2", 
              "rgl", "rmarkdown", "rmdformats", "rstatix", "scales", "scater", "scatterplot3d", "sjPlot", "stringr", 
              "superb", "tibble", "tidyverse", "tint", "tufte", "viridis", "xlsx")

# Load all libraries
invisible(lapply(packages, library, character.only = TRUE))

# Note: Do not load 'forestplot' as it conflicts with 'ggforestplot'

# Install packages if not already installed
# renv::install() # Installs from the basic R repository
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install(c("ComplexHeatmap", "DESeq2", "dmetar", "fgsea", "ggforestplot", "ggsankey", "limma", "Maaslin2", "metagenomeSeq", "MOFA2", "qpgraph", "scater", "scRNAseq", "sevenbridges"))
# remotes::install_github(c("davidsjoberg/ggsankey", "fossbert/binilib", "MathiasHarrer/dmetar", "mattflor/chorddiag", "NightingaleHealth/ggforestplot"))
# devtools::install_github("mattflor/chorddiag") # Alternative installation method

Importing Data and Metadata

#First set your data folder:
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")# or:"C:/Users/patati/Desktop/Turku/R" #check the wd with: here::here() #or getwd()
# load("thereal.RData") #This is so to say real data, and it is not available here (at the site).

# It is easier to load the ready stiched data in one go with .RData file than one by one as below, but
# for educational purposes I put some examples what may need to be done to get your data in ok form 
# for later purposes
setwd("C:/Users/patati/Desktop/Turku/R")
NAFLD=read_excel("NAFLD_SteroidStudy.xlsx",sheet = "LFAT_steroidsDATA") # This is partly auxiliary
oknames=colnames(NAFLD); NAFLD=data.frame(NAFLD)

#The names of the steroid groups need to be imported early on:
groups=read.csv("groups_17823.csv", header = TRUE, sep=";")
groups=groups[,c('Group','Abbreviation')]
groups=groups[groups[,'Abbreviation']!='F',]
groups=groups[order(groups[,'Group']),]
groups[,'Abbreviation'][groups[,'Abbreviation']=='17aOH-P4']='17a-OHP4'
GVZ=groups

ForSteroidNames=read_excel("NAFLD_SteroidStudy_for groups.xlsx",sheet = "Steroid name abbreviations") # This is partly auxiliary
groups2=data.frame(ForSteroidNames)[,1:4]; g1=read.csv("groups_17823.csv", header = TRUE, sep=";")
groups2=cbind(g1[,'Group'], groups2[,c('Abbreviation','Abbreviation_old','Name')])
groups2=groups2[groups2[,'Abbreviation']!='FF',]; colnames(groups2)[1]="Group";
groups2=groups2[order(groups2[,'Group']),]
groups2[,'Abbreviation'][groups2[,'Abbreviation']=='17aOH-P4']='17a-OHP4'

#P4 was found from elsewhere to have the following characteristics:
NAFLD[,'P4'] = as.numeric(NAFLD[,'P4'])
NAFLD[,'P4'][is.na(NAFLD[,'P4'])] = 22557.3330346846#median(NAFLD[,'P4'], na.rm=TRUE) 
NAFLD[,5:7][NAFLD[,5:7]==0.01]=0; colnames(NAFLD)=oknames
MASLD=read_excel("Combined.Matrix.For.Pauli.2023.10.17.Excel.Formatv2.xlsx") # This is the main file
oknames=colnames(MASLD); MASLD=data.frame(MASLD); colnames(MASLD)=oknames   # All kinds of tricks are needed for getting the right data format
rownames(MASLD)=MASLD[,1]
MASLD[,'P4'] = as.numeric(MASLD[,'P4']) #The same comment as above
MASLD[,'P4'][is.na(MASLD[,'P4'])] = 22557.3330346846 
eva=c('Grade(0-3)', 'Stage(0-4)','Necroinflammation')
MASLD[,eva][MASLD[,eva]==0.01]=0; 
td=c('11-KDHT','AN','DHT','17a-OHP5','E2','P5','DOC')
val=c(103,252,51,200,26.5,253,10); vale=c(100,250,50,200,25,250,10)
for (i in 1:7) {MASLD[,td][i][MASLD[,td][i]==val[i]]=vale[i]} 

# These (E) are ok as per lab:
ME=read.csv('E_tikka231023.csv',header=TRUE, sep=";")
ME2=rownames(MASLD[MASLD[,'E']==106000,]) 
to=ME[which(ME[,1] %in% ME2),'patient.number']
te=ME[which(ME[,1] %in% ME2),'E']
MASLD[as.character(to),'E']=te
# These (11-KA4) will perhaps change in the lab (sometime after 24.10.23):
M11=read.csv('11KA4_tikka231023.csv',header=TRUE, sep=";")
# M11[,1][c(1:5,9)];MASLD[as.character(M11[,1][c(1:5,9)]),'11-KA4'] #These were denoted with 'big interference'
MASLD[as.character(M11[,1][c(1:5,9)]),'11-KA4'] = NA #Alternatively: median(MASLD[!rownames(MASLD) %in% as.character(M11[,1][c(1:5,9)]),'11-KA4'])
a=MASLD[order(MASLD[,'BMI']),'BMI']
b=NAFLD[order(NAFLD[,'BMI']),'BMI']
them=unique(b[! b %in% a])
NAFLD=NAFLD[order(NAFLD[,'BMI']),] 
NAFLD=NAFLD[NAFLD[,'BMI']!=them,]
MASLD=MASLD[order(MASLD[,'BMI']),]
#https://appsilon.com/imputation-in-r/ #https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
# New data import withouth changing the conames: https://readxl.tidyverse.org/articles/column-names.html
Bali=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Liver_BA",.name_repair = "minimal")); row.names(Bali)=Bali[,1]
Pfase=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "PFAS_serum",.name_repair = "minimal")); rownames(Pfase)=as.vector(unlist(Pfase[,1]))
Base=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Serum_BA",.name_repair = "minimal"));rownames(Base)=as.vector(unlist(Base[,1]))
C4=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "C4",.name_repair = "minimal")); rownames(C4)=as.vector(unlist(C4[,1]))
Clini=data.frame(read_excel("Matching clinical data_all.xlsx",sheet = "Sheet1",.name_repair = "minimal")); rownames(Clini)=as.vector(unlist(Clini[,1]));
#https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-tests-in-statistics/
MASLD[1:2,2:27] #or head(MASLD);
##                    E     11-KT 17a-OHP5        S        B  11b-OHA4 11-KDHT
## 24145190413 47616.43 1013.1342 5023.952 754.5581 8580.997 13735.454     100
## 2459090909  44356.73  935.1387 1111.426 569.7927 6167.239  4709.126     100
##               11-KA4      DHEA    T/Epi-T  17aOH-P4       AN      DHT       A4
## 24145190413 1833.115 12456.910   912.9862 1095.8051 250.0000  50.0000 3065.905
## 2459090909  1004.484  5890.108 10091.7241  864.5884 574.9827 471.7821 1683.676
##                  DOC       P5        P4        A       E2       E1      PFHpA
## 24145190413 58.15483 2375.196 270.70746 516.4204 864.3368 493.2497 0.03923419
## 2459090909  53.65145 1583.961  85.07423 752.5202  60.0817 139.6349 0.12359324
##                  PFHxS       PFNA       PFOA       PFOS      PFAS
## 24145190413 0.02715859 0.01086114 0.08592297 0.05282948 0.2160064
## 2459090909  0.07177058 0.04601904 0.24675879 0.47560478 0.9637464
# The below ordering needs to be changed...
Bali=Bali[as.character(MASLD$PatientNumber),];Bali[1:3,2:10] #https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character I did otherway around
##                    CA     CDCA       DCA      LCA      UDCA      GCA    GCDCA
## 24145190413  6.500727 30.82903 18.416920 80.34275 35.334259  6928.07 27816.43
## 2459090909   7.507067 28.13308 16.789504 76.62429  6.387519 18653.05 21291.73
## 24102261010 13.503284 39.76252  9.959156 89.36230 23.349181 19576.52 16703.54
##                   GDCA    GDHCA
## 24145190413 6324.72401 1.761993
## 2459090909  9430.65918 1.938591
## 24102261010   15.05147 1.777252
Base=Base[as.character(MASLD$PatientNumber),];#Base[1:3,2:10]
Clini=Clini[as.character(MASLD$PatientNumber),];#Clini[1:3,2:10] # Many of these are irrelevant here... so not opening them, they would exhaust this file
C4=C4[as.character(MASLD$PatientNumber),];#C4[1:3,]
Pfase=Pfase[as.character(MASLD$PatientNumber),];Pfase[1:3,2:10]
##             Benzylparaben Methylparaben Perfluorodecyl.ethanoic.acid      PFHpA
## 24145190413     0.7051692    0.06670446                   0.05383560 0.03923419
## 2459090909      0.4099753    0.03094425                   0.04050537 0.12359324
## 24102261010     0.5434552    0.20578393                   0.03777816 0.01848635
##                 PFHxA  PFHxA.1      PFHxS       PFNA       PFOA
## 24145190413 0.3198820 6.445621 0.02715859 0.01086114 0.08592297
## 2459090909  0.2237116 7.254053 0.07177058 0.04601904 0.24675878
## 24102261010 0.1774767 2.979016 0.01765444 0.02409549 0.09258503
# Menopause markers:
menopause=read_excel("Putative_metabolic_markers_menopause.xlsx",sheet='menopause markers',.name_repair = "minimal"); #rownames(Clini)=as.vector(unlist(Clini[,1]));
menopause=menopause[8:dim(menopause)[1],]; menopause=menopause[,-15]; menopause[2,2:14]=menopause[1,2:14]; menopause=data.frame(menopause); menopause[2,13:14]=c('v1','v2'); #dim(menopause)
colnames(menopause)=c('row_names',menopause[2,2:dim(menopause)[2]]); menopause=menopause[3:dim(menopause)[1],];rownames(menopause)=as.vector(unlist(menopause[,1]));
menopause=menopause[as.character(MASLD$PatientNumber),]
colnames(Pfase)[colnames(Pfase)=='PFHxA.1']='PFHxA_Branched'
Pfase=Pfase[,colnames(Pfase)!='Benzylparaben.1']
Pfase[Pfase[,'Benzylparaben']>10,'Benzylparaben']=NA 

Jeihou=data.frame(read_excel("Copy of BA_liverfat_RawData.xls",.name_repair = "minimal")); row.names(Jeihou)=Jeihou[,1];Jeihou=Jeihou[as.character(MASLD$PatientNumber),]
u=Jeihou[Jeihou[,'GHDGA']=='<LLOQ',1]; a=u[!is.na(u)]; b=rownames(Bali[Bali[,'GHDGA']==1,]);
uu=Jeihou[Jeihou[,'GHDGA']=='No Result',1]; aa=uu[!is.na(uu)]; 
Bali[as.character(a),'GHDGA']=min(Bali[,'GHDGA'],na.rm=TRUE)/2
heps=Bali[Bali[,'GHDGA']==1,1] 
Bali[as.character(heps),'GHDGA']=NA
#https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
mat=Bali[,c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')]
mat[!mat>1]=10000
mat[mat==2]=10000 #Colmins did not work so I used (i.e. colmins ei toiminut ja käytin):
hip=do.call(pmin, lapply(1:nrow(mat), function(i)mat[i,])) #https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
hou=c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')
for (i in 1:5) {Bali[Bali[,hou[i]]==1,hou[i]]=hip[i]}
for (i in 1:5) {Bali[Bali[,hou[i]]==2,hou[i]]=hip[i]}

# An imputation for the missing values:
C4[is.na(C4[,2]),2]=median(C4[!is.na(C4[,2]),2]) #assuming that these were not below quantitation and replacing with median
#https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/
#https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-unequal-columns

#Matching two unequal columns.. match the names of one original column (dat2) to ones that are missing (dat1 with to other) #Not sure if this should be this difficult...
tv=cbind(MASLD[,1],NAFLD[,2:7],Clini[,'HOMA.IR'],MASLD[,colnames(NAFLD[,8:27])],Bali[,2:dim(Bali)[2]], C4[,2:dim(C4)[2]],Base[,2:dim(Base)[2]],Pfase[,(2:(dim(Pfase)[2]))], MASLD[,'PFAS']);
colnames(tv)[colnames(tv)=='C4[, 2:dim(C4)[2]]']='C4';colnames(tv)[colnames(tv)=='Clini[, \"HOMA.IR\"]']='HOMA-IR'
colnames(tv)[colnames(tv)=='MASLD[, \"PFAS\"]']='PFAS';
colnames(tv)[colnames(tv)=="MASLD[, 1]" ]='PatientNumber';#colnames(tv)#
rownames(tv)=unlist(Bali[,1]); 
hep=colnames(tv)[!colnames(tv) %in% c( "Benzylparaben" ,"Methylparaben")] 

# Not sure when it is the best time to take not needed variables away, perhaps at the very end?
tv=tv[,hep]
# Here I add the lipids. In the future, I need to divide all the groups in their own components e.g. dataframe called 'lipids' so that adding them will be more straightforward:
tv=cbind(tv,MASLD[,(dim(MASLD)[2]-13):dim(MASLD)[2]]) 
# hupo=match(   colnames(tv)[colnames(tv) %in% groups2[,3]], groups2[,3] ) # do ni; https://www.geeksforgeeks.org/how-to-find-index-of-element-in-vector-in-r/
# tvauxe=tv
# colnames(tv)[colnames(tv) %in% groups2[,3]]=groups2[hupo,2]


# The basic preprocessing is just the below lines:
tve=tv[,2:dim(tv)[2]]; tve[tve == 0] <- NA; #Almost all variables are here
tv_half <- tve %>% mutate(replace(., is.na(.), min(., na.rm = T)/2)) #https://mdatools.com/docs/preprocessing--autoscaling.html
tv_half_log2 <- log2(tv_half);
tv_auto <- prep.autoscale(as.matrix(tv_half_log2), center = TRUE, scale = TRUE);  #https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
tv_all=cbind(tv[,1],tv_auto); 

# Changing the column names needs to have separate variables for each type of variable (contaminant, steroid, etc.)
x1=colnames(tv_all[,c(1:8)]); v2=dim(NAFLD)[2]+1
x2=colnames(tv_all[,9:v2]);v3=(dim(Bali)[2]+v2);x3=colnames(tv_all[,(v2+1):(v3)]);v4=(dim(Base)[2])+v3
x4=colnames(tv_all[,(v3+1):(v4-1)]);x5=colnames(tv_all[,(v4):(dim(tv_all)[2])]); 
x3 <- paste(x3, "_L", sep="") #https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #Dividing to lipids
x5=x5a  #Making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(tv_all)=nm; #tv_all[1:5,1:30]; #NAFLD[1:2,1:28];
colnames(tv_all)[colnames(tv_all)=='MASLD[, \"PFAS\"]']='PFAS';
# This (deletion) is good to do after all the previous:
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
tv_all=tv_all[,!colnames(tv_all) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]

# In case you would need just the logged values:
tv_half_log22=cbind(tv[,1],tv_half_log2);
x1=colnames(tv_half_log22[,c(1:8)]); v2=dim(NAFLD)[2]+1
x2=colnames(tv_half_log22[,9:v2]);v3=(dim(Bali)[2]+v2);
x3=colnames(tv_half_log22[,(v2+1):(v3)]);v4=(dim(Base)[2])+v3
x3=x3[c(length(x3),1:(length(x3)-1))]
x4=colnames(tv_half_log22[,(v3+1):(v4-1)]);
x5=colnames(tv_half_log22[,(v4):(dim(tv_half_log22)[2])]);
x3 <- paste(x3, "_L", sep="") 
#https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #dividing to lipids
x5=x5a  #making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(tv_half_log22)=nm; #tv_half_log22[1:5,1:30]; #NAFLD[1:2,1:28];
colnames(tv_half_log22)[colnames(tv_half_log22)=='MASLD[, \"PFAS\"]']='PFAS';
# This (deletion) is good to do after all the previous:
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
tv_half_log22=tv_half_log22[,!colnames(tv_half_log22) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]

# This needs to be done early on:
colnames(tv)[colnames(tv)=='17aOH-P4']='17a-OHP4'
colnames(tv_half_log22)[colnames(tv_half_log22)=='17aOH-P4']='17a-OHP4'
colnames(tv_all)[colnames(tv_all)=='17aOH-P4']='17a-OHP4'

tv_all=tv_all[,!colnames(tv_all) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
tv_all=tv_all[,!colnames(tv_all) %in% x4]

# In case you would need nonscaled covariates and scaled/logged all other variables:
tv_covscl=tv_all
tv_covNS=cbind(tv[,1:8],tv_all[,9:dim(tv_all)[2]])
tv_LOG_covscl=tv_half_log22
tv_LOG_covNS=cbind(tv[,1:8],tv_half_log22[,9:dim(tv_half_log22)[2]])
colnames(tv_covNS)[1:8]=colnames(tv_all)[1:8]
colnames(tv_LOG_covNS)[1:8]=colnames(tv_all)[1:8]
# This is needed occasionally:
tv_c=tv_covscl 
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub # https://rdrr.io/bioc/qpgraph/man/qpNrr.html

hupo=match(   colnames(tv_c)[colnames(tv_c) %in% groups2[,3]], groups2[,3] ) 
# do ni; https://www.geeksforgeeks.org/how-to-find-index-of-element-in-vector-in-r/
tvauxe=tv_c
colnames(tv_c)[colnames(tv_c) %in% groups2[,3]]=groups2[hupo,2]

# This needs to be done also soon, to gather all the treatment etc. variable names separately...: 
Treatment=colnames(tv_all)[52:58];
Mediator=colnames(tv_all)[9:28];
Outcome=colnames(tv_all)[c(29:51,59:71)]; ##https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/

Outcome=Outcome[!Outcome %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
Outcome=Outcome[! Outcome %in% x4] #https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
Mediator[Mediator=="17aOH-P4"]="17a-OHP4"
Treatment=Treatment[!Treatment %in% c('Perfluorodecyl.ethanoic.acid')]

# tvauxe2=tv_LOG_covscl
# hupo=match(   colnames(tv_LOG_covscl)[colnames(tv_LOG_covscl) %in% groups2[,3]], groups2[,3] ) 
# colnames(tv_LOG_covscl)[colnames(tv_LOG_covscl) %in% groups2[,3]]=groups2[hupo,2]

# save.image('forACMES_thereal.RData')
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")

Setting Global Variables

options(scipen = 999) # Disable scientific notation
# rm(list = ls()) # Clear workspace; this should not be if you have the load above
thedate <- strftime(Sys.Date(), "%d%m%y") #Do not take the old date from the load...
date <- paste0('tikka', thedate) # Customize this as needed

# Example installation commands
# remotes::install_github("fossbert/binilib", force=TRUE)
# install.packages(c('tidyverse', 'tibble'))
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install("Maaslin2")
# devtools::install_github("davidsjoberg/ggforestplot")
# remotes::install_version("insight", version = "0.20.5", repos = "http://cran.us.r-project.org", force=TRUE)
# font_import() # Import fonts if not already done
# loadfonts(device = "win") # Load fonts for Windows
# renv::status() # Check renv status
# library(rmarkdown); render("path/to/file.Rmd") # Render R Markdown document
# remove.packages("DelayedArray")
# BiocManager::install("DelayedArray")
# install.packages("Require") # Install 'Require' package

Making Boxplots

#https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
#https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender

boxplots <- function(tvt, Group, Outcome, Out, oute, other) {
  # Filter data based on gender
  tvt <- tvt %>%
    filter(if (Group == 'Male') Gender == 1 else if (Group == 'Female') Gender == 0 else TRUE)
  
  # Prepare data for plotting
  Steroid <- rep(colnames(tvt[, 9:28]), each = nrow(tvt))
  data2 <- rep('Control', nrow(tvt))
  num <- ifelse(Outcome == 'HOMA-IR', 1.5, min(tvt[[Outcome]]))
  data2[tvt[[Outcome]] > num] <- 'Case'
  Treatment <- data2
  Concentration <- as.vector(unlist(tvt[, 9:28]))
  data <- data.frame(Steroid, Treatment, Concentration)
  data$Group <- 0
  
  # Correct steroid names if the level exists
  if ("17aOH-P4" %in% levels(data$Steroid)) {
    data <- data %>%
      mutate(Steroid = fct_recode(Steroid, '17a-OHP4' = '17aOH-P4'))
  }
  
  # Assign groups
  rownames(groups2) <- 1:20
  for (i in seq_len(nrow(groups2))) {
    data[data$Steroid %in% groups2$Abbreviation[i], 'Group'] <- groups2$Group[i]
  }
  
  # Set plot title
  title <- paste(Out, "'s Effect on Concentrations of Steroids in ", Group, sep = "")
  
  # Define legend labels
  e1 <- ifelse(num == 1.5, paste('Case (>', num, ')', sep = ""), paste('Case (>', 0, ')', sep = ""))
  e2 <- ifelse(num == 1.5, paste('Control (<=', num, ')', sep = ""), paste('Control (=', 0, ')', sep = ""))
  
  # Remove rows with NA concentrations
  data <- data %>% filter(!is.na(Concentration))
  
  # Create boxplot
  p <- ggplot(data, aes(x = Steroid, y = Concentration, fill = Treatment)) +
    geom_boxplot(notch = FALSE, notchwidth = 0.5, outlier.shape = 1, outlier.size = 2, coef = 1.5) +
    theme_classic2() +
    theme(axis.text.x = element_text(angle = 90, hjust = 0.95, vjust = 0.2, size = 10.5),
          axis.text = element_text(color = "black"),
          panel.grid.minor = element_blank(),
          text = element_text(size = 10.5, family = "Calibri"),
          axis.title = element_text(size = 14),
          plot.title = element_text(size = 14),
          legend.text = element_text(size = 14),
          legend.title = element_text(size = 14)) +
    labs(x = "Steroids", y = "Log2 of Picomolar Concentrations", title = title) +
    scale_fill_manual(values = c("orange", "blue"), name = oute, labels = c(e1, e2)) +
    facet_grid(~Group, scales = "free_x", space = "free") +
    stat_compare_means(hide.ns = TRUE, label = "p.signif", method = "wilcox.test",
                       symnum.args = list(cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
                                          symbols = c("****", "***", "**", "*", "ns")),
                       size = 8, paired = FALSE, label.y = 15.5)
  
  # Save plot as PNG
  path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
  pngfile <- fs::path(path, paste0(Group, Out, 'boxe', ".png"))
  knitr::include_graphics(pngfile)
}

# Example usage
tv_half_log22[, '11-KA4'][tv_half_log22[, '11-KA4'] == min(tv_half_log22[, '11-KA4'])] <- median(tv_half_log22[, '11-KA4'])
other <- '261124'
ie <- tv_half_log22
hupo <- match(colnames(ie)[colnames(ie) %in% groups2[, 3]], groups2[, 3])
colnames(ie)[colnames(ie) %in% groups2[, 3]] <- groups2[hupo, 2]
windowsFonts(A = windowsFont("Calibri (Body)"))

# The significance levels are: '****<0.001', '***<0.01', '**<0.05', '*<0.1'
Outcome='Steatosis Grade';Out='Steatosis'; oute='Steatosis Grade';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Outcome='Fibrosis Stage';Out='Fibrosis'; oute='Fibrosis Stage';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other) 

# https://www.elsevier.es/en-revista-annals-hepatology-16-articulo-assessment-hepatic-fibrosis-necroinflammation-among-S1665268119314590 #So it is in grade
Outcome='Necroinflammation';Out='Necroinflammation'; oute='Necroinflammation Grade';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Outcome='HOMA-IR';Out='HOMA-IR'; oute='HOMA-IR';num=1.5 ;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)

Making Forest Plots

# Define the NAFLD dataset by selecting the first 28 columns from tv
NAFLD <- tv[, 1:28]
# Convert specific columns to binary values using vectorized operations
cols_to_binary <- c(5, 6, 7)
NAFLD[, cols_to_binary] <- (NAFLD[, cols_to_binary] > 0) * 1
# Convert column 8 to binary based on the threshold of 1.5
NAFLD[, 8] <- (NAFLD[, 8] > 1.5) * 1
# Clean column names to remove special characters and make them consistent
patterns <- c("-", "/", "11", "17", "#")
replacements <- c(".", ".", "X11", "X17", ".") #?
# Ensure patterns and replacements are correctly paired
if (length(patterns) == length(replacements)) {
  for (i in seq_along(patterns)) {
    colnames(NAFLD) <- gsub(patterns[i], replacements[i], colnames(NAFLD))}} else {
      stop("Patterns and replacements vectors must be of the same length.")}


# This works with the autoscaled (raw if loge=1 and remove 1 in the means) data NAFLD as well...
pre_errors_2=function(NAFLD,Outcome,Group,name,ordera,oute,first,e,xlim) { # Group='Female'
  
  # Filter data based on the 'Group' variable
  NAFLDo <- switch(Group,
                   "Male" = NAFLD[NAFLD[,'SEX.1F.2M'] == 2, ],
                   "Female" = NAFLD[NAFLD[,'SEX.1F.2M'] == 1, ],
                   "All" = NAFLD)
  
  # Initialize vectors to store sample data and counts
  sample_data <- list()
  n0 <- n1 <- 0

  # Loop through the two groups (Outcome == 0 and Outcome > 0)
  for (i in 1:2) {
    SG0 <- if (i == 1) {
      NAFLDo[NAFLDo[, Outcome] == 0, ]
    } else {
      NAFLDo[NAFLDo[, Outcome] > 0, ]}
    
    # Store the count of samples in each group
    if (i == 1) {
      n0 <- nrow(SG0)
    } else {
      n1 <- nrow(SG0)}
  
  # Calculate medians and standard deviations for columns 9 to 28
  means <- apply(SG0[, 9:28], 2, median, na.rm = TRUE)
  sds <- apply(SG0[, 9:28], 2, sd, na.rm = TRUE)
  
  # Calculate error margins
  error_lower <- means - sds
  error_upper <- means + sds
  error <- sds
  
  # Append results to sample_data
  sample_data[[i]] <- data.frame(study = colnames(NAFLD[, 9:28]),
                                 index = colnames(NAFLD[, 9:28]),
                                 result = means,
                                 error = error)}
  df=data.frame(sample_data) #
  
  # Calculate p-values using Wilcoxon test for columns 9 to 28
  ps <- sapply(9:28, function(j) {
    xnam <- colnames(NAFLDo)[j]
    fmla <- as.formula(paste(xnam, "~", Outcome))
    wilcox.test(fmla, data = NAFLDo, exact = FALSE)$p.value})
    #https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
  
  # Calculate the ratio of results and log-transform
  a <- df[df[, 1] == e, 'result.1'] / df[df[, 1] == e, 'result']
  v2 <- data.frame(log(df$result.1 / df$result))
  
  # Rename columns and clean up variable names
  v2$result <- v2[, 1]
  v2$name <- df$study
  v2 <- v2[, 2:3]
  v2$name <- gsub("\\.", "-", v2$name)
  v2$name <- gsub("X11", "11", v2$name)
  v2$name <- gsub("X17", "17", v2$name)
  v2$name[v2$name == "T-Epi-T"] <- "T/Epi-T"
  v2$pval <- ps

  # Calculate result_pure and error
  v2$result_pure <- df$result.1 / df$result
  v2$error <- (abs((1 / df$result) * df$error.1) + abs((df$result.1 / df$result^2) * df$error)) / nrow(NAFLDo) * 1.64
  
  # Adjust error values
  v2$error <- ifelse(v2$error > (median(v2$error) + sd(v2$error)), median(v2$error) * 1.25, v2$error)
  
  # Calculate error bounds and log-transformed values
  v2$errord1a <- v2$result_pure - v2$error
  v2$errord2a <- v2$result_pure + v2$error
  v2$errord1 <- log(v2$errord1a)
  v2$errord2 <- log(v2$errord2a)
  v2$result <- log(v2$result_pure)
  v2$Control <- df$result
  v2$Case <- df$result.1
  
  # Add p-values and significance
  v2$pval0 <- v2$pval
  v2$pval1 <- v2$pval
  v2$Significance0 <- ifelse(v2$pval0 < 0.1, 'Yes', 'No')
  v2$Color0 <- ifelse(v2$pval0 < 0.1, 'blue', 'grey')
  v2$Significance1 <- ifelse(v2$pval1 < 0.1, 'Yes', 'No')
  v2$Color1 <- ifelse(v2$pval1 < 0.1, 'blue', 'grey')
  
  # Merge with group data and sort
  gn <- groups[groups$Abbreviation != 'F', c('Group', 'Abbreviation')]
  gn <- gn[order(gn$Abbreviation), ]
  v2 <- v2[order(v2$name), ]
  v2 <- cbind(v2, gn[order(gn$Abbreviation), ])
  v2 <- v2[order(-v2$result), ]

  xlab = "Autoscaled Concentrations (SE)" #xlab = "Raw Concentrations in Log10 Scale (SE)"}
  xlim=c(min(v2$errord1),max(v2$errord2)) 
  #Occasionally: xlim=c(min(v2$result)*1.1,max(v2$result)*1.1) # if (xlim[2]>1) {xlim[2]=1};# if (xlim[1] < -0.75) {xlim[1]=-0.75};
  
 # Create forest plot
  plote2 <- forestplot(df = v2,
                       estimate = result,
                       se = 0,
                       pvalue = pval1,
                       psignif = 0.1,
                       xlim = xlim,
                       xlab = 'Logged Ratio between Raw Concentrations of Case and Control with 90% CI',
                       ylab = 'Steroid Groups',
                       title = '',
                       colour = Significance1) +
    ggforce::facet_col(facets = ~Group, scales = "free_y", space = "free", strip.position = 'left') +
    geom_errorbarh(aes(xmin = errord1, xmax = errord2, height = .0, colour = Significance1))
  
  # Set color palette
  hp <- if (sum(v2$Significance1 == 'Yes') == 20) c('blue', 'blue') else c('#999999', 'blue')
  
  # Order factor levels based on Group and first
  if (Group=='All' & first==TRUE) {ordera=rev(groups$Abbreviation)#v2$name[order(v2$result)]; #
  plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='All' & first==FALSE) {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='Female') {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)} else if
  (Group=='Male') {plote2[["data"]][["name"]]=factor(plote2[["data"]][["name"]], levels = ordera)}
  #https://www.r-bloggers.com/2020/03/how-to-standardize-group-colors-in-data-visualizations-in-r/
  plote2$layers[[1]]$aes_params$odd <- "#00000000" #https://stackoverflow.com/questions/71745719/how-to-control-stripe-transparency-using-ggforestplot-geom-stripes
  
  v2$Group2=v2$Group
  v2 <- transform(v2,Group2 = as.numeric(as.factor(Group2)))
  v2$facet_fill_color <- c("red", "green", "blue", "yellow", "brown")[v2$Group2]
  
  # Create plot with custom themes
  jopon <- plote2 + theme(axis.text.y = element_blank()) + theme_classic2()
  jopon2 <- jopon + geom_point(aes(colour = factor(Significance1)), colour = v2$Color1) +
    scale_color_manual(values = hp) + theme(legend.position = "none") + theme(strip.text.y = element_text(size = -Inf))
  
  # Customize facet strip colors
  g <- ggplot_gtable(ggplot_build(jopon2))
  stripr <- which(grepl('strip-l', g$layout$name))
  fills <- c("red", "green", "blue", "yellow", "brown")
  for (i in seq_along(stripr)) {
    j <- which(grepl('rect', g$grobs[[stripr[i]]]$grobs[[1]]$childrenOrder))
    g$grobs[[stripr[i]]]$grobs[[1]]$children[[j]]$gp$fill <- fills[i]}
  # grid::grid.draw(g)
  
  # Save plot as JPEG and convert to PDF and SVG
  jpeg(paste(name, "divi.jpg"), width = 7500, height = 11000, quality = 100, pointsize = 16, res = 1000)
  print(grid::grid.draw(g))
  dev.off()
  
  daiR::image_to_pdf(paste(name, "divi.jpg"), pdf_name = paste0(paste(name, "divi.jpg"), '.pdf'))
  my_image <- image_read(paste(name, "divi.jpg"))
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(name, "divi.svg"))
  
  return(ordera) #If you do not want to have 'null' to the Rmarkdown/html take this away
  } 


# This is with first(!!). Use it. 
Outcome='Steatosis.Grade.0.To.3';Out='Steatosis'; oute='Steatosis';first=TRUE; e='P4';ordera=c();
Group='All';name1=paste("Forest plot of",Group, "Steroid Ratios in",Out);
hel=pre_errors_2(NAFLD,Outcome,Group,name1,ordera,oute,first,e,xlim)
# #Afterwards:
first=FALSE;
Group='Female';name2=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name2,ordera=hel,oute,first,e,xlim)
Group='Male'; name3=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name3,ordera=hel,oute,first,e,xlim)
# 
Outcome='Fibrosis.Stage.0.to.4'; Out='Fibrosis';oute='Fibrosis';
Group='All'; name4=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name4,ordera=hel,oute,first,e,xlim)
Group='Female';name5=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name5,ordera=hel,oute,first,e,xlim)
Group='Male'; name6=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name6,ordera=hel,oute,first,e,xlim)
# 
Outcome='Necroinflammation'; Out='Necroinflammation';oute='Necroinflammation';
Group='All'; name7=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name7,ordera=hel,oute,first,e,xlim) #not the very first though...
Group='Female';name8=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name8,ordera=hel,oute,first,e,xlim)
Group='Male'; name9=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name9,ordera=hel,oute,first,e,xlim)
# 
Outcome='HOMA.IR';Out='HOMA-IR';oute='HOMAIR';
Group='All';name10=paste("Forest plot of",Group, "Steroid Ratios in",Out);
pre_errors_2(NAFLD,Outcome,Group,name10,ordera=hel,oute,first,e,xlim) #not the very first though...
Group='Female';name11=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name11,ordera=hel,oute,first,e,xlim)
Group='Male'; name12=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
pre_errors_2(NAFLD,Outcome,Group,name12,ordera=hel,oute,first,e,xlim)
# Fyi: I was able to revise some of the above codes with Copilot...
Forest plot of All Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Making Chord Diagrams

# First the correlations for the chord diagrams (both male and female as well as total subjects):

# Copy tvauxe to tv_c
tv_c = tvauxe

# Match column names in tv_c with the third column in groups2 and get their indices
hupo = match(colnames(tv_c)[colnames(tv_c) %in% groups2[, 3]], groups2[, 3])
# Replace matched column names in tv_c with corresponding values from the second column in groups2
colnames(tv_c)[colnames(tv_c) %in% groups2[, 3]] = groups2[hupo, 2]
ok = colnames(tv_c)

# Convert tv_c to a data frame
tv_c = data.frame(tv_c)
# Remove specific columns from tv_c
tv_c = tv_c[, !colnames(tv_c) %in% c('Total_TG', 'PFAS', "Perfluorodecyl.ethanoic.acid")]

# Separate data by gender
tvf = tv_c[tv_c[, 'Gender'] == min(tv_c[, 'Gender']), 1:dim(tv_c)[2]]
tvm = tv_c[tv_c[, 'Gender'] == max(tv_c[, 'Gender']), 1:dim(tv_c)[2]]

# Create a list of data frames for total, female, and male subjects
tvtest = list(tv_c, tvf, tvm)

# Clean up column names in each data frame in tvtest
for (i in 1:3) {
  colnames(tvtest[[i]]) <- gsub("\\.", "-", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X11", "11", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X17", "17", colnames(tvtest[[i]]))
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "T-Epi-T"] = "T/Epi-T"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Steatosis-Grade"] = "Steatosis Grade"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Fibrosis-Stage"] = "Fibrosis Stage"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "17aOH-P4"] = "17a-OHP4"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "HOMA IR"] = "HOMA-IR"
}

# Assign cleaned data frames back to tv_c, tvf, and tvm
tv_c = tvtest[[1]]
tvf = tvtest[[2]]
tvm = tvtest[[3]]

# Rename specific value in x4
x4[x4 == "X7.oxo.DCA_S"] = "X7-oxo-DCA_S"

# Calculate Spearman correlations for total subjects
dat = tv_c
dat = dat %>% select(-c('PatientNumber'))  # Remove 'PatientNumber' column
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r  # Calculate correlation matrix
colnames(resulta) = ok[2:dim(tv_c)[2]]
rownames(resulta) = ok[2:dim(tv_c)[2]]

# Calculate Spearman correlations for female subjects
dat = tvf
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultaf) = ok[3:dim(tv_c)[2]]
rownames(resultaf) = ok[3:dim(tv_c)[2]]

# Calculate Spearman correlations for male subjects
dat = tvm
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultam) = ok[3:dim(tv_c)[2]]
rownames(resultam) = ok[3:dim(tv_c)[2]]

# Define column groups for different variables
at = colnames(resulta)[1:(length(x1) - 1)]  # Clinicals
bt = colnames(resulta)[(length(at) + 1):(length(at) + length(x2))]  # Steroids
ct = colnames(resulta)[(length(at) + length(bt) + 1):(length(at) + length(bt) + length(x3))]  # BA_l
dt = colnames(resulta)[(length(at) + length(bt) + length(ct) + 1):(length(at) + length(bt) + length(ct) + length(x4))]  # BA_s
et = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(x5))]  # PFAS
ft = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + length(et) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(et) + length(x6))]  #

# Store lengths of each group
atl = length(at)
btl = length(bt)
ctl = length(ct)
dtl = length(dt)
etl = length(et)
ftl = length(ft)

# Set significance level for correlations
n_level = 0.01

# Filter correlations based on significance level for total subjects
Nrr = qpNrr(resulta, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resulta)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resulta = tes_t

# Filter correlations based on significance level for female subjects
Nrr = qpNrr(resultaf, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultaf)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resultaf = tes_t

# Filter correlations based on significance level for male subjects
Nrr = qpNrr(resultam, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultam)
tes_t = cond * RN
tes_t = as.matrix(tes_t)
resultam = tes_t

# Rename 'Gender' column to 'Sex(F-M+)' in correlation matrices
colnames(resulta)[colnames(resulta) == 'Gender'] <- 'Sex(F-M+)'
rownames(resulta)[rownames(resulta) == 'Gender'] <- 'Sex(F-M+)'

# Update column and row names in correlation matrices
colnames(resulta)[2:dim(resulta)[2]] = ok[3:dim(tv_c)[2]]
rownames(resulta)[2:dim(resulta)[2]] = ok[3:dim(tv_c)[2]]
colnames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(tv_c)[2]]
rownames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(tv_c)[2]]
colnames(resultam)[2:dim(resultam)[2]] = ok[4:dim(tv_c)[2]]
rownames(resultam)[2:dim(resultam)[2]] = ok[4:dim(tv_c)[2]]

# Function to create chord diagrams for different groups
group_chords <- function(vars, n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f) {
  classes <- 5
  tot <- rownames(resulta)[2:dim(resulta)[1]]
  range <- 1:(a + b + c + e + f)
  layout(matrix(1:1, 1, 1))
  title <- 'Sex'
  genders <- gend
  windowsFonts(A = windowsFont("Calibri (Body)"))
  i <- 1
  tes_t <- vars    
  
  # Set column and row names based on gender
    if (gend == 'All') {
    colnames(tes_t) <- rownames(resulta)
    rownames(tes_t) <- rownames(resulta) } else {
    colnames(tes_t) <- rownames(resultaf)
    rownames(tes_t) <- rownames(resultaf)}
  

  
  # Define groups for different variables
  g1 <- c(rep('Clinical', a), rep('Steroids', b), rep('BA_liver', c), rep('Contaminants', e), rep('Lipids', f))
  
  # Remove self-correlations within each group
  tes_t[1:a, 1:a] <- 0
  tes_t[(a + 1):(a + b), (a + 1):(a + b)] <- 0
  tes_t[(a + b + 1):(a + b + c), (a + b + 1):(a + b + c)] <- 0
  tes_t[(a + b + c + 1):(a + b + c + e), (a + b + c + 1):(a + b + c + e)] <- 0
  tes_t[(a + b + c + e + 1):(a + b + c + e + f), (a + b + c + e + 1):(a + b + c + e + f)] <- 0
  
  # Define group structure and color palette
  group <- structure(g1, names = colnames(tes_t))
  grid.col <- structure(c(rep('#93c29f', a), rep('#a83277', b), rep('red', c), rep('grey', e), rep('black', f)),
                        names = rownames(tes_t))
  
  # Filter and adjust correlation matrix
  tes_t <- tes_t[range, range]
  grid.col <- grid.col[range]
  g <- graph.adjacency(tes_t, mode = "upper", weighted = TRUE, diag = FALSE)
  e <- get.edgelist(g)
  df <- as.data.frame(cbind(e, E(g)$weight))
  df[, 3] <- as.numeric(df[, 3])
  
  # Define color function for edges
  col_fun <- colorRamp2(c(-1, 0, 1), c("blue", 'white', "orange"), transparency = 0.25)
  
  # Remove specified elements from the data frame
  df <- df[!df$V1 %in% rem, ]
  df <- df[!df$V2 %in% rem, ]
  
  # Define legends for the plot
  lgd_group <- Legend(at = gend, type = "points", legend_gp = gpar(col = colors), title_position = "topleft", title = title)
  lgd_points <- Legend(at = unique(g1), type = "points", legend_gp = gpar(col = unique(grid.col)), title_position = "topleft", title = "Class")
  lgd_lines <- Legend(at = c("Positive", "Negative"), type = "points", legend_gp = gpar(col = c('orange', 'blue')), title_position = "topleft", title = "Correlation")
  lgd_edges <- Legend(at = c(-1, 1), col_fun = col_fun, title_position = "topleft", title = "Edges")
  lgd_list_vertical <- packLegend(lgd_group, lgd_points, lgd_lines, lgd_edges)
  
  # Set parameters for the chord diagram
  circos.par(gap.after = 1.5, start.degree = 90)
  chordDiagram(df, annotationTrack = c("grid"), grid.col = grid.col, directional = FALSE, symmetric = TRUE, scale = FALSE,
               link.lwd = 0.3, link.border = "white", order = rownames(tes_t), preAllocateTracks = 1, col = col_fun, transparency = 0.25, big.gap = 10, small.gap = 1)
  
  # Add text and axis to the plot
  circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
    xlim <- get.cell.meta.data("xlim")
    ylim <- get.cell.meta.data("ylim")
    sector.name <- get.cell.meta.data("sector.index")
    circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
    circos.axis(h = "top", labels.cex = 0.000001, major.tick.length = 0.2, sector.index = sector.name, track.index = 2)
  }, bg.border = NA)
  
  # Set font and draw legends
  windowsFonts(A = windowsFont("Calibri (Body)"))
  draw(lgd_list_vertical, x = unit(5, "mm"), y = unit(5, "mm"), just = c("left", "bottom"))
  
  # Save the plot as a JPEG file
  dev.copy(jpeg, paste0(gend, n_level, 'hiee.jpg'), width = 12, height = 12, units = "in", res = 1000)
  dev.off()
  
  # Include the plot in the report and convert to PDF and SVG
  knitr::include_graphics(paste0(gend, n_level, 'hiee.jpg'))
  daiR::image_to_pdf(paste0(gend, n_level, 'hiee.jpg'), pdf_name = paste0(paste0(gend, n_level, 'hie'), '.pdf'))
  my_image <- image_read(paste0(gend, n_level, 'hiee.jpg'))
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(paste0(gend, n_level, 'hie.jpg'), ".svg"))
}

# All variables
n_level = 0.01
circos.clear()
vars = list(resulta)
big = 'Yes'
title = 'All Variables'
rem = x4
modi = 5
colt = 'black'
a = length(x1) - 1
b = length(x2)
c = length(x3)
d = length(x4)
e = length(x5)
f = length(x6)
gend = c('All')
colors = c('blue')
group_chords(vars[[1]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

# Genderwise:
vars = list(resultaf, resultam)
big = 'No'
title = 'Genders Separated'
rem = x4
modi = 4
colt = 'black'
colors = c('white', 'black')
a = length(x1) - 2
b = length(x2)
c = length(x3)
d = length(x4)
e = length(x5)
f = length(x6)
gend = c('Female')
colors = c('white')
group_chords(vars[[1]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

gend = c('Male')
colors = c('black')
group_chords(vars[[2]], n_level, fig_name, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)

#Copiloting is not working very well here, so I just let it comment some of the lines ...

Making Variance Explained Plots

# Some info regarding of making the data:
# This is it! https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
#
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
varex_groups_plot <- function(tv_all, Group) {
  # Initialize error flag
  an.error.occured <- FALSE
  tv_all2 <- tv_all
  
  # Check if 'Gender' column exists
  tryCatch({
    tv_all2[, 'Gender']
  }, error = function(e) {
    an.error.occured <<- TRUE
  })
  
  # Determine condition based on Group and presence of 'Gender' column
  cond <- if (an.error.occured) {
    if (Group == 'female') {
      tv_all2[, 'SEX.1F.2M'] == min(tv_all2[, 'SEX.1F.2M'])
    } else if (Group == 'male') {
      tv_all2[, 'SEX.1F.2M'] == max(tv_all2[, 'SEX.1F.2M'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  } else {
    if (Group == 'female') {
      tv_all2[, 'Gender'] == min(tv_all2[, 'Gender'])
    } else if (Group == 'male') {
      tv_all2[, 'Gender'] == max(tv_all2[, 'Gender'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  }
  
  # Filter data based on condition
  tv_red <- tv_all2[cond, ]
  hep <- tv_red
  colnames(hep)[1:8] <- colnames(tv_red)[1:8]
  
  # Transpose the data for SingleCellExperiment
  tv2 <- t(hep[, 9:ncol(tv_red)])
  
  # Create SingleCellExperiment object
  sce <- SingleCellExperiment(tv2)
  logcounts(sce) <- tv2
  sce@colData <- DataFrame(hep[, 2:8])
  colnames(colData(sce)) <- colnames(tv_all)[2:8]
  colnames(colData(sce))[1] <- 'The Gender'

  # Calculate variance explained
  vars <- getVarianceExplained(sce, variables = colnames(colData(sce))[1:7])
  colVars(vars)

  # Set font and color palette
  windowsFonts(A = windowsFont("Calibri (Body)"))
  mypalette <- scales::hue_pal()(ncol(colData(sce)))
  names(mypalette) <- colnames(tv_all)[2:8]
  
  # Adjust vars if Group is not 'All'
  if (Group != 'All') {
    vars <- vars[, 2:7]
  }
  
  # Plot explanatory variables
  p <- plotExplanatoryVariables(vars) +
    theme(text = element_text(size = 25, family = "Calibri")) +
    theme(axis.text = element_text(size = 20, family = "Calibri"))
  
  # Clean up plot data
  p[[1]] <- p[[1]][!is.na(p[[1]][, 1]), ]
  p[[1]][, 1] <- as.vector(unlist(p[[1]][, 1]))
  p[[1]] <- p[[1]][order(p[[1]][, 1]), ]

  # Save plot as PNG
  path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
  sips <- paste0(Group, 'vex', ".png")
  pngfile <- fs::path(path, sips)
  agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300, scaling = 2)
  plot(p)
  invisible(dev.off())
  
  # Include plot in report
  knitr::include_graphics(pngfile)
  
  # Convert image to PDF and SVG
  daiR::image_to_pdf(sips, pdf_name = paste0(sips, '.pdf'))
  my_image <- image_read(sips)
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste(sips, ".svg"))
  p
  
}
# Example usage:
varex_groups_plot(tv_all, Group = 'All')

varex_groups_plot(tv_all, Group = 'female')

varex_groups_plot(tv_all, Group = 'male')

#Copilot helped with the commenting here.

Making Heatmap with Effect Sizes

# Function to calculate Cohen's d effect sizes
# https://www.statology.org/cohens-d-in-r/
cohd <- function(NAFLD, tv, Group, Outcome) {
  
  # Filter data based on gender
  if (Group == 'Male') {
    NAFLDo <- NAFLD[NAFLD[, 'Gender'] == max(NAFLD[, 'Gender']), ]
    tva <- tv[tv[, 'SEX.1F.2M'] == max(tv[, 'SEX.1F.2M']), ]
  } else if (Group == 'Female') {
    NAFLDo <- NAFLD[NAFLD[, 'Gender'] == min(NAFLD[, 'Gender']), ]
    tva <- tv[tv[, 'SEX.1F.2M'] == min(tv[, 'SEX.1F.2M']), ]
  } else {
    NAFLDo <- NAFLD
    tva <- tv}
  
  # Check if the Outcome column exists
  if (!Outcome %in% colnames(NAFLDo)) {
    stop("The specified Outcome column does not exist in the data frame.")}
  
  # Filter data based on outcome
  if (Outcome != 'HOMA-IR') {
    SG0 <- NAFLDo[NAFLDo[, Outcome] == min(NAFLDo[, Outcome]), ]
    SG1 <- NAFLDo[NAFLDo[, Outcome] > min(NAFLDo[, Outcome]), ]
  } else {
    SG0 <- NAFLDo[tva[, 'HOMA-IR'] <= 1.5, ]
    SG1 <- NAFLDo[tva[, 'HOMA-IR'] > 1.5, ]}
  
  # Initialize vector to store Cohen's d values
  cd <- numeric(20)
  
  # Calculate Cohen's d for each variable
  for (i in 1:20) {
    group1 <- SG0[, i + 8]
    group2 <- SG1[, i + 8]
    
    data <- data.frame(
      value = c(group1, group2),
      group = factor(rep(c("group1", "group2"), c(length(group1), length(group2)))))
    
    result <- cohen.d(value ~ group, data = data)
    cd[i] <- result$cohen.d[2]}
  
  return(cd)}

# Initialize an empty vector
d <- c()

# Define the dataset
NAFLD <- tv_all

# Function to calculate Cohen's d for different groups and outcomes
calculate_cohd <- function(outcome) {
  Group <- 'All'
  a <- cohd(NAFLD, tv, Group, outcome)
  Group <- 'Female'
  b <- cohd(NAFLD, tv, Group, outcome)
  Group <- 'Male'
  c <- cohd(NAFLD, tv, Group, outcome)
  cbind(a, b, c)}

# Calculate Cohen's d for different outcomes and combine results
outcomes <- c('Steatosis Grade', 'Fibrosis Stage', 'Necroinflammation', 'HOMA-IR')
for (outcome in outcomes) {
  d <- cbind(d, calculate_cohd(outcome))}

# Set row names
rownames(d) <- colnames(tv_all[, 9:28])

# Create column names with groups and outcomes
colnames(d) <- unlist(lapply(outcomes, function(outcome) {
  paste(c('All', 'Female', 'Male'), outcome, sep = "_")}))

# Save the results to a CSV file
write.csv(d, paste0('cohens_da_', thedate, '.csv'))

# Convert the results to a data frame
n <- d
x <- data.frame(n)
row.names(x) <- colnames(tv_all[, 9:28])

# Adjust group names
groups <- groups
groups[groups == "17a-OHP4"] <- "17aOH-P4"
op <- groups[order(groups$Group), 'Abbreviation']
op <- op[op %in% row.names(x)]
x <- x[op, ]

# Function to create breaks for the heatmap
brks_heatmap <- function(mat, color_palette) {
  rng <- range(mat, na.rm = TRUE)
  lpal <- length(color_palette)
  c(seq(rng[1], 0, length.out = ceiling(lpal / 2) + 1),
    seq(rng[2] / dim(mat)[1], rng[2], length.out = floor(lpal / 2)))}

# Define color palette
color_palette <- colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)

# Save heatmap as JPEG
jpeg(paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)

# Set viewport for the heatmap
setHook("grid.newpage", function() pushViewport(viewport(x = 1, y = 1, width = 0.9, height = 0.9, name = "vp", just = c("right", "top"))), action = "prepend")

# Generate heatmap
pheatmap(x, cluster_cols = FALSE, cluster_rows = FALSE, breaks = brks_heatmap(x, color_palette), color = color_palette, column_names_side = "bottom", angle_col = 90)

# Reset viewport hook
setHook("grid.newpage", NULL, "replace")

# Add text annotations
grid.text("Steatosis, Fibrosis, Necroinflammation, HOMA-IR", y = -0.07, x = 0.4, gp = gpar(fontsize = 16))
grid.text("Steroids (Androgens, Estrogens, Gluc., Mineraloc., Progestogens)", 
          x = -0.07, rot = 90, gp = gpar(fontsize = 16))

# Save the plot again for quality reasons
dev.copy(jpeg, paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)
dev.off()

# Include the image in the document
knitr::include_graphics(paste0("cohensd_e2", date, ".jpg"));dev.off()

# Convert image to PDF
daiR::image_to_pdf(paste0("cohensd_e2", date, ".jpg"), pdf_name = paste0("cohensd_e2", date, ".pdf"));#dev.off()

# Convert image to SVG
my_image <- image_read(paste0("cohensd_e2", date, ".jpg"))
my_svg <- image_convert(my_image, format = "svg")
image_write(my_svg, paste0("cohensd_e2", date, ".svg"))

# Fyi: Revising with Copilot the above codes works partially...

Making Heatmaps with Correlations

#The correlations (ok):
#Correlation matrices:
#http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
#squares are good for individual associations, because the order is the same
# More info regarding the function:
# https://jokergoo.github.io/circlize_book/book/legends.html
# https://cran.r-project.org/web/packages/ggplotify/vignettes/ggplotify.html
# https://bioinfo4all.wordpress.com/2021/03/13/tutorial-7-how-to-do-chord-diagram-using-r/
# https://jokergoo.github.io/circlize_book/book/advanced-usage-of-chorddiagram.html
# https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html

tv_c=tv_covscl#tv_half_log22 #cbind(tv[,1:8], tv_half_log2) #check also not logged and then the auto one
tv_c=tv_c[,c(1:3,4:(dim(tv_c)[2]))]#dim(tv_c)[2],
tv_c=tv_c[,!colnames(tv_c) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
tv_c=tv_c[,!colnames(tv_c) %in% x4]
colnames(tv_c)[colnames(tv_c)=="17aOH-P4"]="17a-OHP4"
tvf=tv_c[tv_c[,'Gender']==min(tv_c[,'Gender']),1:dim(tv_c)[2]] #tv['Steatosis.Grade.0.To.3'==0,9:27]] #tv[tv[,'Necroinflammation']==0,9:80]; #SG0i=as.numeric(SG0i); check also: tv[tv[,'HOMA-IR']==0,9:80]
tvm=tv_c[tv_c[,'Gender']==max(tv_c[,'Gender']),1:dim(tv_c)[2]]


# rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
dat = tv_c; 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson # intersect(colnames(resulta), rownames(resulta)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.a=rcorr(as.matrix(dat), type = c('spearman'))$P; 
p.mat.a[is.na(p.mat.a)]=1; 
p.mat.aa=matrix(p.adjust(p.mat.a,method="BH"),nrow=dim(p.mat.a)[1],ncol=dim(p.mat.a)[2]); 
rownames(p.mat.aa)=rownames(p.mat.a);colnames(p.mat.aa)=colnames(p.mat.a)
# write.csv(resulta,'MASLD_steroid_study correlations with spearman_log_tikka12424.csv')
# resulta=dat
dat=tvf; # 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson# intersect(colnames(resultaf), rownames(resultaf)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.f=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.f[is.na(p.mat.f)]=1; 
p.mat.ff=matrix(p.adjust(p.mat.f,method="BH"),nrow=dim(p.mat.f)[1],ncol=dim(p.mat.f)[2]); 
rownames(p.mat.ff)=rownames(p.mat.f);colnames(p.mat.ff)=colnames(p.mat.f)
# write.csv(resultaf,'MASLD_steroid_study female correlations with spearman_log_tikka12424.csv')
dat=tvm;  # 
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #dat= dat %>% select(-c('Gender')) #this is quite nice way to delete columns, please remember...
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson # intersect(colnames(resultam), rownames(resultam)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.m=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.m[is.na(p.mat.m)]=1; 
p.mat.mm=matrix(p.adjust(p.mat.m,method="BH"),nrow=dim(p.mat.m)[1],ncol=dim(p.mat.m)[2]); 
rownames(p.mat.mm)=rownames(p.mat.m);colnames(p.mat.mm)=colnames(p.mat.m)
# write.csv(resultam,'MASLD_steroid_study male correlations with spearman_log_tikka12424.csv')
resulta[resulta==1]=0
resultam[resultam==1]=0
resultaf[resultaf==1]=0;#min(resultaf);max(resultaf);
n_level=1



# https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
order="original" #alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
range='orig';corre='re_renormae'; method='color' #color square
jpeg(paste("Correlations with Full Plot of All_vok_nes",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resulta, type = "lower", order = order,method=method, tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #,is.corr = FALSE
dev.off();
eoh=paste("Correlations with Full Plot of All_vok_nes",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resulta, type = "lower", order = order,method=method, tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #,is.corr = FALSE

jpeg(paste("Correlations with Full Plot of Female_voek",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resultaf, type = "lower", order = order,method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE)
dev.off();
eoh=paste("Correlations with Full Plot of Female_voek",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resultaf, type = "lower", order = order,method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE)

jpeg(paste("Correlations with Full Plot of Male_voeka",n_level,order,range,corre,method,".jpg"), width = 8000, height = 8000, quality = 100,pointsize = 23, res=300);
corrplot(resultam, type = "lower", order = order, method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #order = "alphabet",  order = "hclust",
dev.off();
eoh=paste("Correlations with Full Plot of Male_voeka",n_level,order,range,corre,method,".jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
corrplot(resultam, type = "lower", order = order, method=method,tl.col = "black", tl.srt = 90, diag = FALSE,col = rev(COL2('RdBu')),is.corr = FALSE) #order = "alphabet",  order = "hclust",

#The ok ones:
x1=colnames(resulta)[c(1:6)]
x1=c(x1[3:6],x1[1],x1[2])#x1[2],
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']
colnames(resulta)[colnames(resulta)=="17aOH-P4"]="17a-OHP4"
colnames(p.mat.a)[colnames(p.mat.a)=="17aOH-P4"]="17a-OHP4"
x2[x2=="17aOH-P4"]="17a-OHP4"
x2=x2[order(match(x2,groups[,2]))] #https://stackoverflow.com/questions/1568511/how-do-i-sort-one-vector-based-on-values-of-another
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']
#
# resulta1=resulta[c(x1,x2),x5];p.mat.a1=p.mat.aa[c(x1,x2),x5]
# resulta2=resultaf[c(x1,x2),x5];p.mat.f1=p.mat.ff[c(x1,x2),x5]
# resulta3=resultam[c(x1,x2),x5];p.mat.m1=p.mat.mm[c(x1,x2),x5]
# # tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
# hip1='transposesa_kaikki scale';width = 2400;height=6000;pch.cex=1.2;
# ho='PFAS vs. clinical factors and steroids'
# resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3)
# p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1)
# width = 6000;height=2800;
#
resulta1=resulta[c(x3,x6),x5];p.mat.a1=p.mat.aa[c(x3,x6),x5]
resulta2=resultaf[c(x3,x6),x5];p.mat.f1=p.mat.ff[c(x3,x6),x5]
resulta3=resultam[c(x3,x6),x5];p.mat.m1=p.mat.mm[c(x3,x6),x5]
# tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
hip1='transpose';width = 2400;height=6000;pch.cex=1.2;ho='PFAS vs. BAs and lipids'
resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3)
p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1)
width = 9000;height=2800;

resulta1[resulta1 >0.4] = 0.4
resulta1[resulta1 < -0.4] = -0.4

resulta2[resulta2 >0.4] = 0.4
resulta2[resulta2 < -0.4] = -0.4

resulta3[resulta3 >0.4] = 0.4
resulta3[resulta3 < -0.4] = -0.4

# hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
# resulta1[resulta1 > 1]  = 1
# resulta1[resulta1 < -1] = -1 #col.lim=c(-0.4,0.4))
#
# #https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# #https://statisticsglobe.com/change-font-size-corrplot-r
# #order can be: alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
#
order="original"; range='orig';corre='no_rendorm'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)
cl.offset=1.0;cl.length=5;cl.cex = 1.3;pch.cex=1.3;pch=20;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
jpeg(paste("Square Correlation Plot ofdd",ho,ga,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);# par( ps=ps)# par(cex.lab=90)
corrplot(resulta1, type = type, order = order,method=method, p.mat=p.mat.a1, tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
sig.level = c(.001,.05, .2),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE, col.lim=c(-0.4,0.4) ) #only in age...0.001,
dev.off();eoh=paste("Square Correlation Plot ofdd",ho,ga,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
# pch.cex=1.3;
jpeg(paste("Square Correlation Plot ofd",ho,gf,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);
corrplot(resulta2, type = type, order = order,method=method, p.mat=p.mat.f1,tl.col = "black",
         cl.cex = cl.cex,  pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .05, .2), cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.4,0.4)) #
dev.off();eoh=paste("Square Correlation Plot ofd",ho,gf,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
# pch.cex=2.9;
jpeg(paste("Square Correlation Plot ofd",ho,gm,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 30, res=300);
corrplot(resulta3, type = type, order = order,method=method, p.mat=p.mat.m1, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,
         pch.col='black',pch=pch,
sig.level = c(.001, .05, .2),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.4,0.4)) #,is.corr = FALSE
dev.off();eoh=paste("Square Correlation Plot ofd",ho,gm,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
#
#
resulta1=resulta[c(x1,x3,x6),x2];  p.mat.a1=p.mat.aa[c(x1,x3,x6),x2]
resulta2=resultaf[c(x1,x3,x6),x2]; p.mat.f1=p.mat.ff[c(x1,x3,x6),x2]
resulta3=resultam[c(x1,x3,x6),x2]; p.mat.m1=p.mat.mm[c(x1,x3,x6),x2]
# tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
hip1='transpose'; width = 3700;height=6300;ho='steroids vs. all others except PFAS';ps=28 #pch=10;
# min(c(resulta2)); max(c(resulta2)) #These are around -0.4 and 0.4
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)

# path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path)
resulta1[resulta1 >0.5] = 0.5
resulta1[resulta1 < -0.5] = -0.5

resulta2[resulta2 >0.5] = 0.5
resulta2[resulta2 < -0.5] = -0.5

resulta3[resulta3 >0.5] = 0.5
resulta3[resulta3 < -0.5] = -0.5
#
#
# resulta1=resulta[c(x3,x6),x2];  p.mat.a1=p.mat.aa[c(x3,x6),x2]
# resulta2=resultaf[c(x3,x6),x2]; p.mat.f1=p.mat.ff[c(x3,x6),x2]
# resulta3=resultam[c(x3,x6),x2]; p.mat.m1=p.mat.mm[c(x3,x6),x2]
# # tv_ah=rango(resulta3,(min(resulta2)),max(resulta2)); resulta3=tv_ah;#
# resulta1=t(resulta1);resulta2=t(resulta2);resulta3=t(resulta3);p.mat.a1=t(p.mat.a1);p.mat.f1=t(p.mat.f1);p.mat.m1=t(p.mat.m1);
#
# # resulta1[resulta1 > 0.25] = 0.4
# # resulta1[resulta1 < -0.25] = -0.4
# hist(as.numeric(unlist(resulta1)),breaks=30,ylim=c(0.0,40))  #xlim=c(0.04,0.4),
#
hip1='transpose'; width = 3200;height=2000;ho='steroids vs. all others except PFAS';ps=12 #pch=10;
min(c(resulta2)); max(c(resulta2)) #These are around -0.4 and 0.4
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)

# resulta1=resulta1[,groups[,'Abbreviation']];resulta2=resulta2[,groups[,'Abbreviation']];resulta3=resulta3[,groups[,'Abbreviation']]

order="original"; range='orig';corre='no_renorm'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
cl.offset=1.0;cl.length=5;cl.cex = 1.0;pch.cex=1.0;pch=20;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
jpeg(paste("Square Correlation Plot ofrra",ho,ga,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90)
corrplot(resulta1, type = type, order = order,method=method, p.mat=p.mat.a1, tl.col = "black", #sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin... mystistä...'#FEE12B'
sig.level = c(.001,.05, .2),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,is.corr = FALSE,col.lim=c(-0.5,0.5)) #only in age...0.001, is.corr = TRUE/FALSE rev(COL2('RdBu')[1:(length(COL2('RdBu'))-0)])
dev.off();eoh=paste("Square Correlation Plot ofrr",ho,ga,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

pch.cex=1.3;
jpeg(paste("Square Correlation Plot ofa",ho,gf,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
corrplot(resulta2, type = type, order = order,method=method, p.mat=p.mat.f1,tl.col = "black",
         cl.cex = cl.cex,  pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .05, .2), cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,,is.corr = FALSE,col.lim=c(-0.5,0.5)) #
dev.off();eoh=paste("Square Correlation Plot of",ho,gf,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

# pch.cex=2.9;
jpeg(paste("Square Correlation Plot ofa",ho,gm,hip1,"3.jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
corrplot(resulta3, type = type, order = order,method=method, p.mat=p.mat.m1, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,
         pch.col='black',pch=pch,
sig.level = c(.001, .05, .2),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,
tl.srt = 90, diag = TRUE,col = col,,is.corr = FALSE,col.lim=c(-0.5,0.5)) #,is.corr = FALSE
dev.off();eoh=paste("Square Correlation Plot of",ho,gm,hip1,"3.jpg")
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

#Tiedoks: Copilotilla ei järkee edelliseen, vaikka näyttää hieman siistimmältä.

Making Heatmaps with Linear Model Estimates

# You may need a rather big function to calculate the estimates and plot at the same time, since the spaces of exper. interest have been reduced from the max dataset size.

# Eli maksimilla vedetään... eli pitäis olla ok, sillä skaalattu vastaa korrelaatiota tss. skaalaus vielä miehiin...
the_funal=function(tv,Group,ok,fn,adj,sig.level,sick,sick_group,joo) { #  ok,aa,bb
  tv <- tv_covscl
  if (Group == 'male') {
    NAFLDo <- tv[tv[, 'Gender'] == max(tv[, 'Gender']), ]
  } else if (Group == 'female') {
    NAFLDo <- tv[tv[, 'Gender'] == min(tv[, 'Gender']), ]
  } else if (Group == 'All') {
    NAFLDo <- tv
  }
  
  SG0 <- NAFLDo[, c(2:dim(tv)[2])]
  oknames <- colnames(SG0)
  SG0 <- data.frame(SG0)
  colnames(SG0[, 8:27]) <- gsub("-", ".", colnames(SG0[, 8:27]))
  colnames(SG0[, 8:27]) <- gsub("/", ".", colnames(SG0[, 8:27]))
  
  hesh <- data.frame()
  
  Treatment <- colnames(tv_all)[52:58]
  Mediator <- colnames(tv_all)[9:28]
  Outcome <- colnames(tv_all)[c(29:51, 59:71)]
  Treatment2 <- Treatment  # Define Treatment2
  
  xnam_list <- list(colnames(SG0)[c(4:7)], colnames(SG0)[c(2)], colnames(SG0)[c(3)], Treatment2, c(x3, x6), c('AGE', 'BMI', colnames(SG0)[c(4:7)]), c(x3, x6))
  y_list <- list(Treatment2, Treatment2, Treatment2, colnames(SG0[, 8:27]), colnames(SG0[, 8:27]), colnames(SG0[, 8:27]), colnames(tv_all)[52:58])
  
  for (k in 1:length(xnam_list)) {
    xnam <- xnam_list[[k]]
    y <- y_list[[k]]
    for (i in 1:length(xnam)) {
      for (j in 1:length(y)) {
        if (Group != 'All') {
          fmla <- as.formula(paste(paste(c(y[j], " ~ "), collapse = ""), paste(c(xnam[i], 'BMI', 'AGE'), collapse = "+")))
        } else if (Group == 'All') {
          fmla <- as.formula(paste(paste(c(y[j], " ~ "), collapse = ""), paste(c(xnam[i], 'BMI', 'AGE', 'Gender'), collapse = "+")))
        }
        poissone <- lm(fmla, data = SG0)
        pss <- summary(poissone)[[4]]
        hoesh <- c(y[j], xnam[i], Group, pss[2, 1], pss[2, 4], pss[2, 2])
        hesh <- rbind(hesh, hoesh)
      }
    }
  }
  
  hesh <- as.data.frame(hesh)
  colnames(hesh) <- c('y', 'x', 'Gender', 'r', 'p', 'var_x')
  
  hesa=hesh
  hoi=as.data.frame(hesh)
  
  
  hopiu=hoi
  colnames(hopiu)=c('y','x','Gender','r','p','var_x')
  colnames(hoi)=c('y','x','Gender','r','p','var_x')
  
  #This in case you want to print to your local computer: ... :)
  # main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn),collapse="")
  main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//"),collapse="")
  setwd(main_dir)
  
  meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
  covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
  
  if (adj=='ok') {
    # p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
    hoi[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
    hopiu[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
  }
  
  if (ok=='big') {
    
    rsa=c();joi=c()
    
    # Eli 'kaksi' vielä tarvitaan...
    # 1) BA/lipid=covar ja steroidit, ja 2) steroid=covar

    meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
    covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
    
    c1=hoi[,2] %in% covas
    c2=hoi[,1] %in% meds
    hyy=c1 & c2
    m1=hoi[hyy,]
    colnames(m1)=c('y','x','Gender','r','p','var_x') #c('y','x','Gender','r','p','radj')
    c1=hoi[,2] %in% c(x3,x6); c2=hoi[,1] %in% meds
    hyy=c1 & c2; m2=hoi[hyy,]
    colnames(m2)=c('y','x','Gender','r','p','var_x') # hist(as.numeric(m2[,6]),breaks=50)
    joi=rbind(m1,m2)
    i=4;rs=c()
    
    for (i in 4:5) {
      rs=joi[,c(1,2,i)] # rs=data.frame(rs)
      rs=reshape(rs,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]

      library(stringr)
      colnames(rs)=str_sub(colnames(rs),3,-1)
      
      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      
      rownames(rs)[rownames(rs)=="Steatosis.Grade"]="Steatosis Grade"
      rownames(rs)[rownames(rs)=="Fibrosis.Stage"]="Fibrosis Stage"
      rownames(rs)[rownames(rs)=="HOMA.IR"]="HOMA-IR"
      covas[covas=="Steatosis.Grade"]="Steatosis Grade"
      covas[covas=="Fibrosis.Stage"]="Fibrosis Stage"
      covas[covas=="HOMA.IR"]="HOMA-IR"
      
      heps=c(groups[,2]) #check that you have driven the steroid data vis file...
      heps[heps=="17aOH-P4"]="17a-OHP4"
      cme1=match(heps,colnames(rs))
      cme2=match(c(covas,x3,x6),rownames(rs))
      rs=rs[cme2,cme1]
      rsa=rbind(rsa,rs) }
    
    
    rs1a=rsa[1:dim(rs)[1],];
    rs2a=rsa[(dim(rs1a)[1]+1):(dim(rs1a)[1]+dim(rs1a)[1]),]
  
    rs1=rs1a;rs2=rs2a
    rownames(rs2)=str_sub(rownames(rs2), end = -2)
    rownames(rs1) <- gsub("\\.", " ", rownames(rs1))
    rownames(rs2) <- gsub("\\.", " ", rownames(rs2))
    rownames(rs1)[rownames(rs1)=="HOMA IR"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rownames(rs1)[rownames(rs1)=="Gender"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check this if needed
  
    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
        rs1[rs1>0.5]=0.5;rs1[rs1 < -0.5]=-0.5;

    
    width=2500; height=4400
    order="original"; range='orig';corre='no_renorm'; type='full'; method='color';#ga='All';gf='Female';gm='Male' #color square
    cl.offset=1.0;cl.length=15;cl.cex = 1.09;pch.cex=1.09;pch=11;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
    ho=Group;hip1='BAs_lipids_as_y vs. steroids_as_x'

    # https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
    # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
    # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

#I have driven these separately for html: 
    # for that you need:     
    jpeg(paste("Linear Model Estimate Plot ofees5",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);

    hepio=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)])
    
    
    
    corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch, ,sig.level = c(.001, .01, .05),cl.pos = cl.pos, 
             insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,tl.cex=0.5, tl.srt = 90, diag = TRUE, #tl.pos='n'
             col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100) ,is.corr = FALSE,col.lim=c(-0.5,0.5));   #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
    
    dev.off()  #,is.corr = FALSE
    eoh=paste("Linear Model Estimate Plot ofees5",hip1,Group,".jpg")
    daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
    my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

    #

    #https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
    #https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
   #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2

    
  } else {
    
    rsa=c();rs1=c();rs2=c()
  
    c1=hoi[,1] %in% x5
    hoi[c1,] # This gives you the PFAS (x5) ok. Tää tulee suoraan tällä PFAS 7:lle ekalle
    c2=hoi[,1] %in% names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)] #vaikeemman kautta
    hyy=c1 & c2
    hoi2=hoi[c2 | c1 ,] #Likewise...
    rownames(hoi2)=1:dim(hoi2)[1]
    hoi2=hoi2[1:182,]
    a=hoi2[1:42,1];b=hoi2[1:42,2]
    hoi2[1:42,]=cbind(b,a,hoi2[1:42,3:5])    
    hoi2=hoi2[,c(2,1,3:5)] 
    
    i=4;
    rse=c()
    for (i in 4:5) {
      
      rse=hoi2[,c(1,2,i)] 
      rse=rse[order(rse[,1]),]
      rs=reshape(rse,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]
      
      library(stringr) 
      colnames(rs)=str_sub(colnames(rs),3,-1)

      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="T-E-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="Steatosis-Grade"]="Steatosis Grade"
      colnames(rs)[colnames(rs)=="Fibrosis-Stage"]="Fibrosis Stage"
      colnames(rs)[colnames(rs)=="17aOH-P4"]="17a-OHP4"
      heps=c(covas,groups[,2])
      heps <- gsub("\\.", " ", heps)
      heps[heps=="HOMA IR"]="HOMA-IR"
      heps[heps=="17aOH-P4"]="17a-OHP4"
      ccc=match(heps,colnames(rs))

      rs=rs[,ccc]
      rsa=rbind(rsa,rs) 
    }
    
    rs1a=rsa[1:7,];
    rs2a=rsa[8:14,]
    rs1=rs1a;rs2=rs2a
    
    

    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check if needed

    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
    rs1[rs1>0.4]=0.4;rs1[rs1 < -0.4]=-0.4;

  order="original"; range='orig';corre='no_renorm'; type='full'; method='color'; #ga='All';gf='Female';gm='Male' #color square
  cl.offset=1.0;cl.length=11;cl.cex = 1.4;pch.cex=1.5;pch=20;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
  ho=Group;hip1='Steroids_y vs. PFAS_as_x'
  width=5500; height=1800
#I have driven these separately for html: 

  
  
  
jpeg(paste("Linear Model Estimate Plot of_sa5",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);
  corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .01, .05),cl.pos =  cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length, tl.cex=0.8, #tl.pos='n',
tl.srt = 90, diag = TRUE,col = colorRampPalette(c('blue','white', 'orange'), alpha = TRUE)(100),is.corr = FALSE,col.lim=c(-0.4,0.4)); 

# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
dev.off();
    eoh=paste("Linear Model Estimate Plot of_sa5",hip1,Group,".jpg")
    daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
    my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))


  } 
  

  return(list(hopiu))
}

# The scaling here just in case:
rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}

#To apply to all groups at one go:
huus=function(tv,adj,sig.level,sick,sick_group,joo) {
  huus=c();huusa=c();heijaa=c('All','female','male'); ok=c('big','small')  ; jj=c()
  hyp=1;hrt=1;#oo="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/lme/"
  for (hyp in 1:2) {
    for (hrt in 1:3) {
      # the_funal=function(tv,Group,ok,aa,bb,fn,adj)
      huus=append(huus,the_funal(tv,heijaa[hrt],ok[hyp],fn,adj,sig.level,sick,sick_group,joo))}}

  return(huus)}

# Driving the function with the parameters as follows:
adj='nook'; sig.level=c(.001,0.01, 0.05); sick='no'; joo='joo' #sickGroup..
metanorm_S_non_fdr=huus(tv_all,adj,sig.level,sick,sick_group,joo) #This prints to your current folder 'Steroid_Data_Analysis

# Ei mee tääkän ihan nopeesti läpi ChatGPT:llä saati Copilotkaan.
# Tää olis kiva saada paremmaks, joten jätän tämän OS. (Funktiota.R löytyy kansiosta, siellä lisää, yks minitoimiva on.)
Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Making Scatter Plots with Models

# This should be as good as it gets... eli  maksimilla vedetään... eli pitäis olla ok, sillä skaalattu vastaa korrelaatiota tss. skaalaus vielä miehiin...
# This looks similar as the linear model code, but is not exactly so. With this you can draw a scatter plot for each individual combination of two variables within dataset
the_fun_figs=function(tv_all, Group, hopiu, aa, bb, fn) {
  # Filter data based on the specified group
  if (Group == 'male') {
    NAFLDo = tv_all[tv_all[,'Gender'] == max(tv_all[,'Gender']),]
  } else if (Group == 'female') {
    NAFLDo = tv_all[tv_all[,'Gender'] == min(tv_all[,'Gender']),]
  } else if (Group == 'All') {
    NAFLDo = tv_all
  }
  
  # Select relevant columns from the filtered data
  SG0 = NAFLDo[, c(2:dim(tv_all)[2])]
  
  # Fix column names by replacing spaces and special characters
  oknames = colnames(SG0)
  SG0 = data.frame(SG0)
  colnames(SG0[, 8:27]) <- gsub("-", ".", colnames(SG0[, 8:27]))
  colnames(SG0[, 8:27]) <- gsub("/", ".", colnames(SG0[, 8:27]))
  
  hesh = c()
  xnam = colnames(SG0)[c(4:7)]
  Treatment = colnames(tv_all)[52:58]
  y = Treatment
  TreatmentN = Treatment
  
  # Loop through each combination of xnam and y to calculate correlations
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      # Extract relevant rows from hopiu based on conditions
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      
      # Prepare data for plotting
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      
      # Clean up Mediator names
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      
      # Set up directory for saving plots
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      
      # Save plots as JPEG files
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted", Group, Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted_alle", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      
      # Plot correlations using ggplot2
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      
      print(a)
      dev.off()
    }
  }
  
  # Repeat the process for different combinations of xnam and y
  xnam <- colnames(SG0)[c(2)]
  y <- TreatmentN
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted", Group, Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 4))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      hesh = rbind(hesh, c(y[j], xnam[i], Group, r, p.val, rsadj))
    }
  }
  
  # Repeat the process for different combinations of xnam and y
  xnam <- colnames(SG0)[c(3)]
  y <- TreatmentN
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted_alle", Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
    }
  }
  
  # Repeat the process for different combinations of xnam and y
  xnam <- TreatmentN
  y = colnames(SG0[, 8:27])
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted", Group, Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 4))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
    }
  }
  
  # Additional analysis for specific combinations of xnam and y
  xnam <- c(x3, x6)
  y <- c(colnames(SG0[, 8:27]))
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted", Group, Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      hesh = rbind(hesh, c(y[j], xnam[i], Group, r, p.val, rsadj))
    }
  }
  
  # Final analysis for specific combinations of xnam and y
  xnam <- c('AGE', 'BMI', colnames(SG0)[c(4:7)])
  y <- c(colnames(SG0[, 8:27]))
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      hösh = hopiu[hopiu[, 1] == y[j] & hopiu[, 2] == xnam[i] & hopiu[, 3] == Group,]
      jeps = SG0
      r = as.numeric(hösh[4][1,])
      p.val = as.numeric(hösh[5][1,])
      rsadj = as.numeric(hösh[6][1,])
      colnames(jeps) = colnames(tv_all)[2:dim(tv_all)[2]]
      Treatment = as.character(hösh[2][1,])
      Mediator = as.character(hösh[1][1,])
      Mediator <- gsub("\\.", "-", Mediator)
      Mediator <- gsub("X", "", Mediator)
      if (Mediator == "T-Epi-T") {
        Mediator[Mediator == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = Treatment
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn, Group, "/"), collapse = "")
      sub_dir <- Treatment
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (Mediator != "T/Epi-T") {
        jpeg(paste("Correlations plotted", Group, Treatment2, Mediator, ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      } else {
        jpeg(paste("Correlations plotted_", Treatment2, 'T.Epi.T', ".jpg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
      }
      xcx = jeps[, Treatment]
      ycy = jeps[, Mediator]
      väh = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(Treatment) +
        ylab(Mediator) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - väh, hjust = 0, vjust = 0, label = paste0("p = ", round(p.val, 5))) +
        theme_classic()
      print(a)
      dev.off()
      hesh = rbind(hesh, c(y[j], xnam[i], Group, r, p.val, rsadj))
    }
  }
  
  # Convert results to a data frame and return
  hesa = hesh
  hoi = as.data.frame(hesh)
  main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//", fn), collapse = "")
  setwd(main_dir)
  
  return(list(hopiu))
}

huus2=function(tv_all,hopiu) {
  huus=c();heijaa=c('All','female','male'); 
  fn='metabnorm//covScaled//non_fdr//'#
  for (hrt in 1:3) {
    huus=append(huus,the_fun_figs(tv_all,heijaa[hrt],hopiu,aa,bb,fn))}
  return(huus)}


# Now I'll be just showing some examples, but this works and if you drive this, you can find all the combos in the folders.
# E.g.: ".../R/metabnorm/covScaled/non_fdr/'Subject'/Hexcer/Correlations plotted All Hexcer S .jpg" 
# fyi: do the 'subject' folders (all, male, fem.) separately
aa=-0.5; bb=0.5; adj='nook'; sig.level=c(.001,0.01, 0.05)
fn='metabnorm//covScaled//non_fdr//'#
metanorm_S_non_fdr=huus(tv_covscl,adj,sig.level) # This was already done above
metanorm_S_fdr_non_tot=ldply(metanorm_S_non_fdr, data.frame) #Making the above ok for the next:
non_fdr_check=huus2(tv_covscl,metanorm_S_fdr_non_tot) # This plots all the combinations, so it takes some time

#The above code in this segment does not get easily better with Copilot, but I let it comment it a bit.

#Making some 3d plots:

# Load the dataset
data <- tv_covscl

# Plot a 3D scatter plot for PFOA, T/Epi-T, and PE
plot3d(
  y = data[,'PFOA'],
  x = data[,'T/Epi-T'],
  z = data[,'PE'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFHxS",
  xlab = "T/Epi-T",
  zlab = "PE"
)

# Fit a linear model for PFHxS based on T/Epi-T and PE
my.lm <- lm(data[,'PFHxS'] ~ data[,'T/Epi-T'] + data[,'PE'])

# Extract the variables for the scatter plot
x <- data[,'T/Epi-T']
y <- data[,'PFHxS']
z <- data[,'PE']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "T/Epi-T",
  ylab = "PFHxS",
  zlab = "PE"
)
s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

# Load the dataset again
data <- tv_covscl

# Plot a 3D scatter plot for PFOA, AN, and PC_O
plot3d(
  y = data[,'PFOA'],
  x = data[,'AN'],
  z = data[,'PC_O'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFOA",
  xlab = "AN",
  zlab = "PC_O"
)

# Fit a linear model for PFOA based on AN and PC_O
my.lm <- lm(data[,'PFOA'] ~ data[,'AN'] + data[,'PC_O'])

# Extract the variables for the scatter plot
y <- data[,'PFOA']
x <- data[,'AN']
z <- data[,'PC_O']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "AN",
  ylab = "PFOA",
  zlab = "PC_O"
)
s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

# Load the dataset again
data <- tv_covscl

# Plot a 3D scatter plot for PFHxA, DHEA, and PC
plot3d(
  y = data[,'PFHxA'],
  x = data[,'DHEA'],
  z = data[,'PC_O'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFHxA",
  xlab = "DHEA",
  zlab = "PC"
)

# Fit a linear model for PFHxA based on DHEA and PC
my.lm <- lm(data[,'PFHxA'] ~ data[,'DHEA'] + data[,'PC'])

# Extract the variables for the scatter plot
y <- data[,'PFHxA']
x <- data[,'DHEA']
z <- data[,'PC']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "DHEA",
  ylab = "PFHxA",
  zlab = "PC"
)
s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])
Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Making Network Plot

# Following the previous work for doing the network plot (https://r-graph-gallery.com/network.html)

# Here the main data of the plot takes correlations:
# Remove specific columns from tv_c
tv_c=tv_c[,!colnames(tv_c) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]; 
# Remove columns specified in x4 from tv_c
tv_c=tv_c[,!colnames(tv_c) %in% x4] 
# Rename column '17aOH-P4' to '17a-OHP4'
colnames(tv_c)[colnames(tv_c)=="17aOH-P4"]="17a-OHP4" 
dat = tv_c; 
# Remove 'Gender' and 'PatientNumber' columns from dat
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] 
# Calculate Spearman correlation matrix
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r 
# Set threshold level for connections between vertices (pampulat)
n_level=0.9 
# Calculate Nrr and replace NA values with 1
Nrr=qpNrr(resulta, verbose=FALSE);Nrr[is.na(Nrr)]=1;
# Create a condition matrix based on n_level threshold
cond=data.frame(as.matrix(Nrr<n_level)) 
# Elementwise matrix multiplication and update column names to match row names # https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
RN=data.frame(resulta);tes_t=cond*RN;tes_t=as.matrix(tes_t);resulta=tes_t;colnames(resulta)=rownames(resulta) 
tes_t=resulta
# Calculate lengths of x1 to x6 and assign to variables a to f
a=length(x1)-2;b=length(x2);c=length(x3);d=length(x4);e=length(x5);f=length(x6); 

# Removing self-correlation
tes_t[1:a,1:a]=0
tes_t[(a+1):(a+b),(a+1):(a+b)]=0
tes_t[(a+b+1):(a+b+c),(a+b+1):(a+b+c)]=0
tes_t[(a+b+c+1):(a+b+c+e),(a+b+c+1):(a+b+c+e)]=0
tes_t[(a+b+c+e+1):(a+b+c+e+f),(a+b+c+e+1):(a+b+c+e+f)]=0

# Select a subset of tes_t based on column names
tes_t = tes_t[colnames(tes_t)[7:66], colnames(tes_t)[7:66]]

# Create a graph from adjacency matrix
g <- graph_from_adjacency_matrix(tes_t, mode="upper", weighted=TRUE, diag=FALSE)

# Convert graph to edge list and create a data frame with weights
e <- as_edgelist(g)
df <- as.data.frame(cbind(e, E(g)$weight))

# Convert the third column to numeric
df[, 3] = as.numeric(df[, 3])
hoi=df 
hoi=hoi[!duplicated(hoi[,c(1,2)]),] # Remove duplicate rows based on the first two columns

# Load igraph library if needed
# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
# Create a data frame for links with source, target, and importance
links <- data.frame(
  source=c("A","A", "A", "A", "A","J", "B", "B", "C", "C", "D","I"),
  target=c("B","B", "C", "D", "J","A","E", "F", "G", "H", "I","I"),
  importance=(sample(1:4, 12, replace=T))) 
# Set column names of hoi to match links
colnames(hoi)=colnames(links) 
links=hoi
# Get distinct sources and rename column to 'label'
sources=hoi %>% distinct(source) %>% rename(source='label')
# Get distinct targets and rename column to 'label'
destinations=hoi %>% distinct(target) %>% rename(target ='label') 
# Merge sources and destinations into nodess
nodess <- full_join(sources, destinations, by = "label") 

# The names of the variable groups, such as xc for contaminants
xc=x5[x5 %in% nodess$label]
xb=x3[x3 %in% nodess$label]
xl=x6[x6 %in% nodess$label]
x2[x2 =='17aOH-P4']='17a-OHP4' #Next time check these wrong names early on... :)
xs=x2[x2 %in% nodess$label]
nodess$label=c(xc,xb,xl,xs)

# Create a data frame 'nodes' with two columns: 'name' and 'carac'
# 'name' is taken from the first column of 'nodess'
# 'carac' is a categorical variable indicating the type of each node
nodes <- data.frame(name=nodess[,1], 
                    carac=( c(rep("Contaminants",length(xc)),rep("Bile Acids",length(xb)),rep("Lipids",length(xl)),rep("Steroids",length(xs))))) #range on kaikki +1

# Convert the data frame 'links' and 'nodes' into an igraph object
network <- graph_from_data_frame(d=links, vertices=nodes, directed=F) 

# Load necessary libraries for color palettes and graphics (library(RColorBrewer), library(ragg)) if needed

# Set the font to 'Calibri (Body)'
windowsFonts(A = windowsFont("Calibri (Body)"))

# Define a palette of 4 colors
coul  <- c('#B2BEB5','Green','Red','Orange') 

# Create a vector of colors corresponding to the 'carac' variable in the network
my_color <- coul[as.numeric(as.factor(V(network)$carac))]

# Save the plot as a JPEG file with specified dimensions and quality
jpeg('network.jpg', width=4, height=4.7, units="in", quality=100, pointsize=7, res=1000)

# Plot the network with specified parameters
plot(network, mode = "circle", vertex.color=my_color, vertex.size = 10,
     edge.arrow.size = 0.8, vertex.label.cex = 0.35, edge.width=as.numeric(E(network)$importance)*6.00 )

# Add a legend to the plot
legend("topright", legend=levels(as.factor(V(network)$carac)),
       col = coul, bty = "n", pch=20, pt.cex = 1.3, cex = 1.3, text.col=coul,
       horiz = FALSE, inset = c(0.65, 0.8))

# Close the JPEG device
dev.off()

# Read the saved JPEG image
eoh='network.jpg'
my_image <- image_read(eoh)

# Convert the image to SVG format and save it
my_svg <- image_convert(my_image, format="svg")
image_write(my_svg, paste(eoh,".svg"))

# Display the figure and convert it to PDF
knitr::include_graphics(eoh)
daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))

# Copilot helped here with the commenting.

Making Causal Mediation Analysis

# The basic hypothesis. All are variables (y~x+m;m~x)
loop_med_simplified1a = function(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group) {
  # Determine the condition based on the group
  if (Group == 'female') {
    cond = tv_all[,'Gender'] == min(tv_all[,'Gender'])
  } else if (Group == 'male') {
    cond = tv_all[,'Gender'] == max(tv_all[,'Gender'])
  } else if (Group == 'All') {
    cond = rep(TRUE, dim(tv_all)[1])
  }
  
  # Filter the dataset based on the condition and sickness status
  tv_red = c()
  if (sick == 'yes') {
    tv_red = tv_all[cond & as.vector(sick_group),]
  } else {
    tv_red = tv_all[cond,]
  }
  
  # Extract the treatment, mediator, and outcome variables
  X <- tv_red[, Treatment]  # Standard values did not give errors
  M <- tv_red[, Mediator]
  Y <- tv_red[, Outcome]  # "Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR"
  
  # Combine the variables into a single data frame
  Data <- cbind(X, M, Y)
  colnames(Data) <- gsub(" ", "_", colnames(Data))
  Data = data.frame(Data)
  
  # Define control and treatment values for the mediation analysis
  control.value = colMins(as.matrix(X))  # Test also with colMedians
  treat.value = colMaxs(as.matrix(X))
  
  # Initialize vectors for storing variable names
  x = c()
  m = c()
  y = c()
  
  # Create variable names for the treatment, mediator, and outcome
  for (i in 1:length(colnames(X))) {
    x = append(x, paste("Data[, ", i , "]", sep = ""))
  }
  for (j in (dim(X)[2] + 1):(length(colnames(M)) + dim(X)[2])) {
    m = append(m, paste("Data[, ", j , "]", sep = ""))
  }
  for (z in (dim(M)[2] + dim(X)[2] + 1):(dim(Data)[2])) {
    y = append(y, paste("Data[, ", z , "]", sep = ""))
  }
  
  # Initialize vectors for storing results
  med_out = c()
  res = c()
  tmp = c()
  rn = c()
  i=1;j=1;z=1
  
  # Loop through each combination of outcome, mediator, and treatment
  for (i in 1:length(y)) {
    for (j in 1:length(m)) {
      for (z in 1:length(x)) {
        # Define the formula for the mediator model (M ~ X)
        fmla1 <- as.formula(paste(paste(m[j], collapse = "+"), " ~ ", paste(x[z], collapse = "+")))
        b = lm(fmla1, Data)
        
        # Define the formula for the outcome model (Y ~ M + X)
        xm = paste(paste(c(x[z], m[j]), collapse = "+"))
        fmla2 <- as.formula(paste(y[i], " ~ ", xm))
        c = lm(fmla2, Data)
        
        # Perform the mediation analysis
        if (t.val == 'no') {
          med_oute = mediation::mediate(b, c, treat = x[z], mediator = m[j], sims = simss)
        } else if (t.val == 'yes') {
          med_oute = mediation::mediate(b, c, treat = x[z], mediator = m[j], sims = simss, control.value = control.value[z], treat.value = X[test, z])
        } else if (t.val == 'minmax') {
          med_oute = mediation::mediate(b, c, treat = x[z], mediator = m[j], sims = simss, control.value = control.value[z], treat.value = treat.value[z])
        }
        
        # Summarize the mediation analysis results
        med_out = summary(med_oute)
        tmp = c(med_out$d0, med_out$d0.p, med_out$d0.ci[1], med_out$d0.ci[2], med_out$z0, med_out$z0.p, med_out$z0.ci[1], med_out$z0.ci[2], med_out$n1, med_out$n1.p, med_out$n1.ci[1], med_out$n1.ci[2], med_out$tau.coef, med_out$tau.p, med_out$tau.ci[1], med_out$tau.ci[2])
        res <- rbind(res, tmp)
        rn = append(rn, paste(colnames(X)[z], colnames(M)[j], colnames(Y)[i], sep = " "))
        remove(tmp)
      }
    }
  }
  
  # Assign row and column names to the results
  rownames(res) = rn
  colnames(res) = c('ACME', 'd0.p', 'd0.ci_l', 'd0.ci_u', 'ADE', 'z0.p', 'z0.ci_l', 'z0.ci_u', 'Proportion Mediated', 'n1.p', 'n.ci_l', 'n1.ci_u', 'Total Effect', 'tau.p', 'tau.ci_l', 'tau.ci_u')
  
  # Order the results by p-value
  res = res[order(res[, 2]),]
  
  # Clean up row names
  rownames(res) <- gsub("X11", "11", rownames(res))
  rownames(res) <- gsub("X17", "17", rownames(res))
  
  # Write the results to an Excel file
  write.xlsx(res, file = paste(name, Group, date, '.xlsx'), append = FALSE, row.names = TRUE)
  
  return(res)
}

# Testing hypothesis one:
the_essentials = function(Treatment, Mediator, Outcome, tv, Group, name, simss, t.val, test, sick, sick_group, fn, lkm, date, joo, ip) {
  # Set the working directory to the specified path
  hoi1 = paste("C:/Users/patati/Desktop/Turku/R/basic causal mediation with the counterfactuals/", fn, sep = '')
  setwd(hoi1)  # Set the working directory
  
  # Define the name for the output files
  name = paste(simss, 'basic_divisions')
  
  # Perform mediation analysis for all groups
  Group = 'All'
  uh7ma = loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group)
  try({uh7ma}, {uh7ma = data.frame(0)})  # Handle errors gracefully
  save(uh7ma, file = paste(fn, 'all.RData'))  # Save the results
  
  # Perform mediation analysis for female group
  Group = 'female'
  uh7f = loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group)
  try({uh7f}, {uh7f = data.frame(0)})  # Handle errors gracefully
  save(uh7f, file = paste(fn, 'female.RData'))  # Save the results
  
  # Perform mediation analysis for male group
  Group = 'male'
  uh7m = loop_med_simplified1a(Treatment, Mediator, Outcome, tv_all, Group, name, simss, t.val, test, sick, sick_group)
  try({uh7m}, {uh7m = data.frame(0)})  # Handle errors gracefully
  save(uh7m, file = paste(fn, 'male.RData'))  # Save the results
}

# Set the number of simulations and other parameters
simss = 100
name = 'Jaot_OK_basica'
joo = 'ei'
ip = 1

# Extract specific columns from the dataset for analysis
ccova = tv[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]

# Define a group based on a condition (sum of specific columns > 4)
sick_group = rowSums(ccova) > 4

# Define file names for different outcomes
file_names = c("Steatosis", "Fibrosis", "Necroinflammation", "HOMAIR", 'Menopause')

# Set additional parameters
lkm = 30
joo = 'ei'
ip = 1
sick = 'yes'
t.val = 'no'
Group = 'All'
# date='houdeesh'

# Set parameters for the 'All' group analysis
joo = 'joo'
sick = 'no'

# Extract specific columns from the dataset for analysis
ccova = tv[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]

# Define the file name and group based on a condition (sum of specific columns > 4)
fn = 'All'
sick_group = rowSums(ccova) > 4

# Driving the analysis takes more than 1h (with a basic laptop 2023,100sims and all three groups (all, female and male)) so drive the below only if needed:
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick, sick_group,fn,lkm,date,joo,ip)
# setwd("C:/Users/patati/Desktop/Turku/R/basic causal mediation with the counterfactuals/") #check this if needed...

# Error in mediate(b, c, treat = x[z], mediator = m[j], sims = simss) : 
  # unused arguments (treat = x[z], mediator = m[j], sims = simss) ...

# In case, the 'case' sample/subject analysis would be needed: 
# Steatosis
# ccovae=tv[,c("Steatosis.Grade.0.To.3")]; sick_group=ccovae>0 #toth# # hist(ccovae,breaks=100) # hist(ccova[,'HOMA-IR'],breaks=100)
# fn=file_names[1];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Fibrosis
# ccovae=tv[,c("Fibrosis.Stage.0.to.4")]; 
# sick_group=ccovae>0 #toth#
# fn=file_names[2]; 
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Necroinfl.
# ccovae=tv[,c("Necroinflammation")]; sick_group=ccovae>0 #toth#
# fn=file_names[3];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# Homa # remember to always test the function with minimun number setting or as light parameters as possible to get it through... before big runs
# joo='ei';sick='yes'
# ccovae=tv[,c("HOMA-IR")]; sick_group=ccovae>1.5 #toth#
# fn=file_names[4]; 
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)

# In addition, then there is also possibility for elaborations, i.e. the adjustments, of the models, 
# but I'll leave it from here for the time being... (please do ask if needed: patati 'tilde' utu dot fi) :)
# fyi: https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html



#Some commenting with Copilot here.

Making Heatmaps for Indirect and Direct Effects

# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
library(readxl)
all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic All tikka3624 .xlsx") # #_1
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all[,1]),];rownames(all_all)=all_all[,1]; all_all=all_all[,2:dim(all_all)[2]]; all_all=all_all[rev(order(all_all[,1])),]
all_all=all_all; #all_all=all_all[all_all[,1]>0,]
#https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
#https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
#https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
groups=GVZ
groups[,'Abbreviation'][groups[,'Abbreviation']=='17aOH-P4']='17a-OHP4'

#Switch = 0: PFAS vs steroids; switch=1: PFAS vs BAs and lipids, switch=2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))

houdees=function(hoi, rt2, switch, mn, z, corr, date, neg) {
  indir=c(); dir=c(); ip=c(); rn=c(); rn2=c()
  Outcome=colnames(tv_covNS)[c(29:51,59:71)]; # The final dataframe is shorter or the like so there were less variables here...
  Treatment=colnames(tv_covNS)[52:58];
  ## https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
  
  # Direct mediation analysis
  if (switch==1) {
    Mediator_ok=Outcome[Outcome %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
    for (i in 1:7) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (median(ap[,'ACME']) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,3][which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,1][which(hoi[,1]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]])
        
        Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
      }
    }
  } else if (switch==0) {
    # Indirect mediation analysis
    Mediator_ok=colnames(tv_all)[9:28][colnames(tv_all)[9:28] %in% names(table(hoi[1:dim(hoi)[1],c(2)]))]
    for (i in 1:7) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (quantile(ap[,'ACME'],0.50) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,2][which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,1][which(hoi[,1]==Treatment[i] & hoi[,2]==Mediator_ok[j])[1]])
        
        Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
      }
    }
  } else if (switch==2) {
    Treatment=colnames(tv_all)[9:28]; # These names are a bit mixed, but the idea is ok.
    Mediator_ok=Outcome[Outcome %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
    
    for (i in 1:length(Treatment)) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (median(ap[,'ACME']) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,3][which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,2][which(hoi[,2]==Treatment[i] & hoi[,3]==Mediator_ok[j])[1]])
        Matrix <- matrix(0, nrow = length(Treatment), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- Treatment
      }
    }
  } # You need three of these, yes :) ;print(ap[,'ADE']) print(rownames(ap[ap[,'ADE']==min(ap[,'ADE']),]))
  
  tot=cbind(rn2,rn,indir) # or indir or dir
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)

  
  # Here you would need to have an evaluation for the 'else', if it is negative, it needs to be max negative, and vice versa for the
  # There does not seem to be doubles: table(tot[,1])
  
  tot[,3]=as.numeric(tot[,3])
  
  library(reshape2)
  jops=dcast(tot, rn2~rn, value.var='indir')
  jops[is.na(jops)]=0
  rownames(jops)=jops[,1]
  jops=jops[,2:dim(jops)[2]]
  jops=as.data.frame(jops)
  jopsr=matrix(as.numeric(unlist(jops)),nrow=dim(jops)[1],ncol=dim(jops)[2])
  colnames(jopsr)=colnames(jops); rownames(jopsr)=rownames(jops)
  
  # Ensure all rows and columns from myData are present in jopsr
  if (sum(!rownames(myData) %in% rownames(jopsr)) > 0) {
    to_df=rownames(myData)[!rownames(myData) %in% rownames(jopsr)]
    jopsr=rbind(jopsr, myData[to_df,]); jopsr=jopsr[rownames(myData),]
  }
  if (sum(!colnames(myData) %in% colnames(jopsr)) > 0) {
    to_df=colnames(myData)[!colnames(myData) %in% colnames(jopsr)]
    jopsr=cbind(jopsr, myData[,to_df]); jopsr=jopsr[,colnames(myData)]
  }
  
  tot=cbind(rn2, rn, ip)
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)
  tot[,3]=as.numeric(tot[,3])
  
  jopsa=dcast(tot, rn2~rn, value.var='ip')
  jopsa[is.na(jopsa)]=0
  rownames(jopsa)=jopsa[,1]
  jopsa=jopsa[,2:dim(jopsa)[2]]
  jopsra=matrix(as.numeric(unlist(jopsa)),nrow=dim(jopsa)[1],ncol=dim(jopsa)[2])
  colnames(jopsra)=colnames(jopsa); rownames(jopsra)=rownames(jopsa)
  
  if (sum(!rownames(myData) %in% rownames(jopsra)) > 0) {
    to_df=rownames(myData)[!rownames(myData) %in% rownames(jopsra)]
    jopsra=rbind(jopsra, myData[to_df,]); jopsra=jopsra[rownames(myData),]
  }
  if (sum(!colnames(myData) %in% colnames(jopsra)) > 0) {
    to_df=colnames(myData)[!colnames(myData) %in% colnames(jopsra)]
    jopsra=cbind(jopsra, myData[,to_df]); jopsra=jopsra[,colnames(myData)]
  }
  
  if (switch==1) {
    # For direct effects
  } else if (switch==0) {
    # For indirect effects
    jopsra=jopsra[,groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsra)]]
    jopsr=jopsr[,groups[,'Abbreviation'][groups[,'Abbreviation'] %in% colnames(jopsr)]]
  } else if (switch==2) {
    jopsra=jopsra[groups[,'Abbreviation'],]
    jopsr=jopsr[groups[,'Abbreviation'],]
  }
  
    # With the neg. you do not put these:
  if (dim(resulta1)[2]==36) {
  Outcome=colnames(tv_covNS)[c(29:51,59:71)];
  Outcome=Outcome[c(1,23,2:22,24:length(Outcome))]
  resulta1=resulta1[,Outcome[Outcome %in% colnames(resulta1) ]];p.mat.a1=p.mat.a1[,Outcome[Outcome %in% colnames(p.mat.a1) ]] }
  for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}} 
  # resulta1 <- t(resulta1);
  # p.mat.a1 <- t(p.mat.a1)
  # ou=round(min(c(abs(max(resulta1)),abs(min(resulta1)))),2)
  # op=ou-0.01
  heps=abs(round((min(resulta1)),1)); hepsa=abs(round((max(resulta1)),1))
  heps2=abs(round((max(resulta1)-0.01),3));heps3=abs(round((min(resulta1)+0.01),3));
  
  if (hepsa-heps >=0 ) {resulta1[resulta1 >= heps2] = hepsa; resulta1[resulta1 <= -heps3] = -hepsa; heps=hepsa} else 
  if (hepsa-heps < 0 ) {resulta1[resulta1 <= -heps] = -heps; resulta1[resulta1 >= heps2] = heps}  
  
  path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) #check this if needed...
  jpeg(paste("Heatmap of high",date, mn,neg,".jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90) 22 18
  # col = brewer.pal(n = 9, name = "YlOrRd")
  order="original"; range='orig';corre='no_renormaa'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
  cl.offset=20;cl.length=7;cl.cex = 1.45;pch.cex=2.45;pch=2;cl.pos = 'r'; #cl.offset=2;cl.length=5;cl.cex = 1.3;pch.cex=1.95;pch=14;
  
  col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)
  if (neg=='no') {col=colorRampPalette(c( 'white','orange'), alpha = TRUE)(150)} else if (neg=='yes')
  {col=colorRampPalette(c('blue', 'white'), alpha = TRUE)(150)} else {col=col}
  
  # if (corr==TRUE) {if (min(as.matrix(resulta1))< -1  | max(as.matrix(resulta1))> 1) {resulta1=rango(resulta1,-1,1)}} else if (min(as.matrix(resulta1)) >= 0)  {resulta1=rango(resulta1,-1,1)} #
  # resulta1=rango(resulta1,-1,1)
  # if (min(as.matrix(resulta1)) >= 0  | max(as.matrix(resulta1)) <= 0) {resulta1=rango(resulta1,-1,1)}
  
  corrplot(as.matrix(resulta1), type = type, order = order,method=method, p.mat=as.matrix(p.mat.a1), tl.col = "black", 
           cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,#pitikö vain pch lisätä pch väriin väriin..'#FEE12B'
           sig.level = c(0.05),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length, #.001, .05, .2
           tl.srt = 90, diag = TRUE,col=col,is.corr = corr,col.lim=c(-heps,heps)) 
  #only in age...0.001,col.lim=c(-heps,heps) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) col.lim=c(-1,1) col.lim=c(-heps,heps)
  #non were significant in neg... but after mody yes!
  
  dev.off(); eoh=paste("Heatmap of high",date, mn,neg,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
  my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
  
  return(list(resulta1, p.mat.a1))
}

# Switch = 0: PFAS vs steroids; 
# Switch = 1: PFAS vs BAs and lipids, 
# Switch = 2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))
# corr=TRUE;z='idir' # uliulie2=houdees(hoi, switch=1,mn='indiruush',z,corr,date); dim(uliulie2[[1]]) 

# #Let's get this comparable done:
# First the 'matrisse'
corr=FALSE; z='dir'; switch=2 
u3=all_all; c1=c(); # mn='basicas' #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
rt2=c1[complete.cases(c1), ] # 0.49 is optimal p value cutoff to get dim(mat[[2]])[2] as 36
hoi = c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi = matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'ade_s_vs_bal_no order3b'); neg='else'
mat=houdees(hoi, rt2, switch, mn, z, corr, date, neg); # dim(mat[[2]])[2] # Indirect effect, kasvata... dim(mat[[2]])[2] == 36

corr=FALSE; z='dir'; switch=1; 
u3=all_all;
c1=c() #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ADE'])),]; #
c1=c1[c1[,'z0.p'] < 0.89, ] # 0.98/0.81/0.45/0.53/0.1... but gives 33 columns so use the other then..
# c1=c1[c1[,'d0.p'] < 0.50, ]
# let us start with the above optima... 643/or similar is dim(c1)[1] so need higher p to get all; (dim(c1)[1]-1)
# Check if this needs to be ADE instead...
c3=c1[c1[,'ADE'] < 0,] # -0.01,]#quantile(c1[,'ACME'])[2]
c1=c3;
rt2=c1[complete.cases(c1), ]
hoi=c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
# mat_neg=list(c(1,2),c(3,4))
mn=paste0(z,'neg'); neg='yes'
mat_neg=houdees(hoi, rt2, switch, mn, z, corr, date, neg); 
# dim(mat_neg[[2]])[2]  # kasvata n, jotta dim(mat_pos[[2]])[2] yhtäkuin kuin length(c(x3,x6)), i.e. 36

u3=all_all; c1=c(); # mn='posae'
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ACME'])),]; # Check acmes and ades
c1=c1[c1[,'z0.p']<0.89, ]
c2=c1[c1[,'ADE'] >= 0,]
c1=c2; # 
# c1= c1[sample(1:nrow(c1)), ];
rt2=c1[complete.cases(c1), ] # rt2[1,]=rt2[1,]
hoi=c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'posa2aa'); neg='no'
mat_pos=houdees(hoi, rt2, switch, mn, z, corr, date, neg='no');

the_real=c(); the_real <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real <- data.frame(the_real)
colnames(the_real) <- colnames(mat[[1]]); rownames(the_real) <- rownames(mat[[1]])
the_real2=c(); the_real2 <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real2 <- data.frame(the_real2)
colnames(the_real2) <- colnames(mat[[1]]); rownames(the_real2) <- rownames(mat[[1]])

m1=mat[[1]]; m2=mat_pos[[1]]; m3=mat_neg[[1]]; m4=mat[[2]]; m5=mat_pos[[2]]; m6=mat_neg[[2]]
# m3=m3[,Outcome[Outcome %in% colnames(m3) ]]; m6=m6[,Outcome[Outcome %in% colnames(m6) ]]

# or... new logic:
the_real=m1; the_real2=m4
# for (i in rownames(m2)) {for (j in colnames(m2)) {if (the_real2[i,j] > 0 ) {the_real[i,j]=m1[i,j]; the_real2[i,j]=m4[i,j]}}}
the_real[the_real >= 0 & the_real < 0.03] = m2[the_real >= 0 & the_real < 0.03]
the_real2[the_real >= 0 & the_real < 0.03] = m5[the_real >= 0 & the_real < 0.03]
the_real[the_real < 0 & the_real > -0.04] = m3[the_real < 0 & the_real > -0.04]
the_real2[the_real < 0 & the_real > -0.04] = m6[the_real < 0 & the_real > -0.04]

# resulta1_big_id=mg # ma
# p.mat.a1_big_id=mgg # maa
# resulta1_big_id=ma
# p.mat.a1_big_id=maa
resulta1_big_id=the_real
p.mat.a1_big_id=the_real2
# resulta1_big_id=m1
# p.mat.a1_big_id=m4

resulta1 <- as.matrix(resulta1_big_id); p.mat.a1 <- as.matrix(p.mat.a1_big_id)
resulta1[is.na(resulta1)]=0

# With the neg. you do not put these:
if (dim(resulta1)[2]==36) {
  Outcome=colnames(tv_covNS)[c(29:51,59:71)];
  Outcome=Outcome[c(1,23,2:22,24:length(Outcome))]
  resulta1=resulta1[,Outcome[Outcome %in% colnames(resulta1) ]]; p.mat.a1=p.mat.a1[,Outcome[Outcome %in% colnames(p.mat.a1) ]] 
}
for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}}

# In addition, you would need:
heps=abs(round((min(resulta1)),1))
heps2=abs(round((min(resulta1)+0.01),2))
resulta1[resulta1 > heps] = heps
resulta1[resulta1 < -heps2] = -heps

# resulta1[resulta1 < 0] = 0 
# so this extends the minimum value to the (max) min limit so as to get the true blue to be seen, at least once
path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) # check this if needed...
if (dim(resulta1)[1]==7) {width = 4000; height=1500} else if (dim(resulta1)[1]==20) {width = 4000; height=2500} else if (dim(resulta1)[1]==36) {width = 2500; height=5000}
jpeg(paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"), width = width, height = height, quality = 100, pointsize = 14, res=300);
col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)

order="original"; range='orig'; corre='no_renormaa'; type='full'; method='color'; ga='All'; gf='Female'; gm='Male' # color square
cl.offset=25; cl.length=7; cl.cex = 1.1; pch.cex=1.95; pch=3; cl.pos = 'r'; 
# cl.offset=2; cl.length=5; cl.cex = 1.3; pch.cex=1.95; pch=14;

corrplot(resulta1, type = type, order = order, method=method, p.mat=p.mat.a1, tl.col = "black", # sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black', pch=pch, # pitikö vain pch lisätä pch väriin väriin...'#FEE12B'
         sig.level = c(0.05), cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset, cl.length=cl.length,
         tl.srt = 90, diag = TRUE, col=col, is.corr = FALSE, col.lim=c(-heps, heps)) # only in age...0.001, -2,2 

dev.off(); eoh=paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
## png 
##   2
my_image <- image_read(eoh); my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path)
  


#Copilot commenting worked for houdees function and some of the drives.
knitr::include_graphics('Square Correlation Plot of_korkeatACME PFAS vs. steroids_ for the hypos_colors_stea 0 All transpose .jpg')
Heatmap of 'Indirect' Effects PFAS vs. Steroids with All Subjects

Heatmap of ‘Indirect’ Effects PFAS vs. Steroids with All Subjects

knitr::include_graphics('Square Correlation Plot of_korkeatACME PFAS vs. steroids_ for the hypos_colors_stea 1 All transpose .jpg')
Heatmap of 'Direct' Effects PFAS vs. Lipids and Bile Acids with All Subjects

Heatmap of ‘Direct’ Effects PFAS vs. Lipids and Bile Acids with All Subjects

knitr::include_graphics('Heatmap of high ACME tikka111124 idir .jpg')
Heatmap of 'Indirect' Effects Steroids vs. Bile Acids and Lipids with All Subjects

Heatmap of ‘Indirect’ Effects Steroids vs. Bile Acids and Lipids with All Subjects

Making Sankey Diagrams

# To do these diagrams, you need to have a reduced dataset as well as a function that accounts for the group (male/female)
reduced2=function(u3, Group, name, lkm, d) {
  c1=c() # u3=all_all1
  ACMEMedian=c(); ACMEpval=c(); ACMEVar=c()
  ADEMedian=c(); ADEpval=c(); ADEVar=c()
  c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
  ACMEMedian=0 # median(c1[,'ACME'][c1[,'ACME']>0])
  # c1=c1[order(c1[,'ACME']),];  # for 'negative' acmes
  c1=c1[rev(order(c1[,'ACME'])),];  
  # c1=c1[c1[,'ACME']<ACMEMedian & ((c1[,'ADE']-c1[,'ACME']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ]
  c1=c1[c1[,'ACME'] > ACMEMedian & ((c1[,'ACME']-c1[,'ADE']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ] 
  
  c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
  
  write.xlsx(c1, file = paste(name, Group, date, '.xlsx'), append = FALSE, row.names = TRUE)
  
  return(c1)
}

plottings_sf=function(uh7ma, date, sick, Group, d) {
  # uh7ma = na.omit(c1)
  rt2=uh7ma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)
  # https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html
  # https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
  # https://github.com/MarioniLab/miloR
  # https://www.nature.com/articles/s41467-023-40458-9/figures/4
  name=paste('Contaminants_Steroids_BAs_or_Lipids_sims', date) # rtot=rtot_2000_mrct # rtot=uh5
  
  hoi=c(); 
  if (d=='t') {
    hoi=scan(text=rt2[,1], what=" ")
  } else {
    hoi=scan(text=rownames(rt2), what=" ")
  } # rownames(rt2)# names(rt2[,1]) rownames(rt2
  # hoi=scan(text=rownames(rt2), what=" ")# rownames(rt2)# names(rt2[,1]) rownames(rt2
  hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) # Check this number (ncol) 3/4
  # hoi=cbind(hoi[,1:3],rt2[,2])
  hoi=hoi[,1:3]
  # colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight')# ,'Gender') ##
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')
  # ,'Gender') ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
  # https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
  # https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
  
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
  hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
  hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  # df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids','Weight') # see the sankey test file 17.10.24 for this...
  
  df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids') 
  
  # In case you need to print to computer: meda='Sankey plot of ', pdf(paste(meda, name, sick, Group, ".pdf"), width = 20, height = 20,  pointsize = 18);
  # print(ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
  # geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 5.5, color = 1, fill = "white") +
  # scale_fill_viridis_d() + theme_sankey(base_size = 30) + theme(legend.position = "none") + theme(axis.title.x = element_blank())); dev.off()
  
  windowsFonts(A = windowsFont("Calibri (Body)")) 

  p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
            geom_sankey(flow.alpha = 1, node.color = 1) +
            # geom_sankey_label(size = 4.0, color = 1, fill = "white") + #
            geom_sankey_label(size = 8.0, color = 1, fill = "white") +
            scale_fill_viridis_d(option = "H", alpha = 0.75) +
            # scale_fill_viridis_c(option = "turbo") +
            # theme_sankey(base_size = 23) + #
            theme_sankey(base_size = 28) + theme(legend.position = "none") +
            # scale_fill_grey(start = 0.5, end = 0.5) +
            theme(axis.text.x = element_text(hjust = 0.5, vjust=7, colour = 'black') ) + # https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
            theme(axis.title.x = element_blank())

  # Nicer colors than grey:
  # p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
  # geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 3.5, color = 1, fill = "white") +
  # scale_fill_viridis_d() + theme_sankey(base_size = 16) + theme(legend.position = "none") + theme(axis.title.x = element_blank());

  library(ragg)
  # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
  # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

  path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/" # oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
  pngfile <- fs::path(path, paste0(Group, 'e', d, ".png")) # fs::path(knitr::fig_path(),  "theming2.png")
  # agg_png(pngfile, width = 30, height = 40, units = "cm", res = 300, scaling = 2) #
  agg_png(pngfile, width = 60, height = 80, units = "cm", res = 600, scaling = 2)
  plot(p)
  invisible(dev.off())
  knitr::include_graphics(pngfile)
  eoh=paste0(Group, 'e', d, ".png"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh, '.pdf'))
  my_image <- image_read(eoh); my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh, ".svg"))
}

library(readxl)
# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic All tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
all_Female=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic Female tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
all_Male=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic Male tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);

# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/")
sick='all samples';d='t'
lkm=30;Group='All'; name='just alala';date=paste0(date,'_allds')#dim(all_all)[1];
alma=reduced2(all_all1,Group,name,lkm);
alma = na.omit(alma)
plottings_sf(alma,date,sick,Group,d)

date=paste0(date,'_femalads');name='just alala';Group='female';
almaf=reduced2(all_Female,Group,name,lkm); #all_Female
almaf = na.omit(almaf)
plottings_sf(almaf,date,sick,Group,d)

date=paste0(date,'_malads');name='just alaal';Group='male';
almam=reduced2(all_Male,Group,name,lkm);
almam = na.omit(almam) #https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
plottings_sf(almam,date,sick,Group,d)


# Alternative way to plot Sankeys with the ACMEs:
# Create data which can be used for Sankey
set.seed(111) # Set seed for reproducibility
theme_set(theme_light()) # Set the theme for the plots

# Trying to imitate the approach below:
u3=all_all1; d='t' # Assign dataset and set 'd' to 't'
c1=c() # Initialize c1

# Initialize ACME variables
ACMEMedian=c(); ACMEpval=c(); ACMEVar=c()

# Initialize ADE variables
ADEMedian=c(); ADEpval=c(); ADEVar=c()

c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
# ACMEMedian=0#median(c1[,'ACME'][c1[,'ACME']>0])
c1=c1[rev(order(c1[,'ACME'])),];  # Order c1 by ACME in descending order
c1=c1[((c1[,'ACME']-c1[,'ADE']) > 0), ] # Filter rows where ACME > ADE
c1=c1[c1[,'d0.p']<0.05, ] # Filter rows where p-value < 0.05
# c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
uh7ma = na.omit(c1) # Remove rows with NA values
rt2=uh7ma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html
# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
# https://github.com/MarioniLab/miloR
# https://www.nature.com/articles/s41467-023-40458-9/figures/4
name=paste('Contaminants_Steroids_BAs_or_Lipids_sims',date) # Create a name for the plot

hoi=c(); 
if (d=='t') {
  hoi=scan(text=rt2[,1] , what=" ") # Scan the first column of rt2
} else {
  hoi=scan(text=rownames(rt2) , what=" ") # Scan the row names of rt2
} # rownames(rt2)# names(rt2[,1]) rownames(rt2
# hoi=scan(text=rownames(rt2) , what=" ")# rownames(rt2)# names(rt2[,1]) rownames(rt2
hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) # Convert hoi to a data frame with 3 columns
hoi=cbind(hoi[,1:3],rt2[,2]) # Combine hoi with the second column of rt2
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight') # Set column names
# ,'Gender') ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot

# Replace specific values in the 'Steroids' column
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  
# Create a long format data frame for Sankey plot
df22 <-
  hoi |>
  subset(Weight > -1.05) |>
  pivot_stages_longer(c("Contaminants", "Steroids", "Bile Acids or Lipids"), "Weight", "Bile Acids or Lipids")

# Set the position for the Sankey plot
pos <- position_sankey(v_space = "auto", order = "ascending", align = "justify")

# Create the Sankey plot
p <-
  ggplot(data = df22, mapping = aes(x = stage, y = Weight, group = node,
                  edge_id = edge_id, connector = connector)) + 
  theme_sankey(base_size = 28) + # Set the theme for the Sankey plot
  # theme(legend.position = "none") +
  # scale_fill_grey(start = 0.5, end = 0.5) +
  theme(axis.text.x = element_text(hjust = 0.5, vjust=7, colour = 'black') ) + # Customize axis text
  theme(axis.title.x = element_blank()) # Remove x-axis title

# Update the position for the Sankey plot
pos <- position_sankey(v_space = "auto", order = "descending")

# Add layers to the Sankey plot
p + geom_sankeyedge(aes(fill = Weight), position = pos) +
  geom_sankeynode(position = pos, fill = "#dfe0e6") +
  geom_text(aes(label = node), stat = "sankeynode", position = pos, cex = 2) +
  scale_fill_viridis_c(option = "turbo") # Set the color scale

# In case you need something else from somewhere else...
# setwd("C:/Users/patati/Desktop/Turku/R/hypo4/HOMAIR/perus_ok")
# all_all=read_xlsx(path = "100 hypo4_yes_sick All tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=read_xlsx(path = "100 hypo4_yes_sick Female tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=read_xlsx(path = "100 hypo4_yes_sick Male tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);

# path="C:/Users/patati/Desktop/Turku/R/basic_cova/All"
# # setwd("C:/Users/patati/Desktop/Turku/R/basic_cova/All")
# files=list.files(path=path, pattern=".RData", all.files=TRUE, full.names=TRUE) #https://www.geeksforgeeks.org/read-all-files-in-directory-using-r/
# list_of_files <- list() 
# for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# names(list_of_files) <- str_remove(list.files(path=path, pattern=".RData", all.files=TRUE, full.names=FALSE),'.RData')  #https://stringr.tidyverse.org/reference/str_remove.html
# 
# # names(list_of_files)[1:3]
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/")
# all_all1=list_of_files[[1]] #all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=list_of_files[[2]] #all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=list_of_files[[3]] #all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);
# 
# d='nt';lkm=30;Group='All'; name='just all'
# alma=reduced2(all_all1,Group,name,lkm);alma = na.omit(alma)
# plottings_sf(alma,date,sick,Group,d)
# 
# name='just all';Group='female';
# almaf=reduced2(all_Female,Group,name,lkm); almaf = na.omit(almaf)
# plottings_sf(almaf,date,sick,Group,d)
# 
# name='just all';Group='male';
# almam=reduced2(all_Male,Group,name,lkm); almam = na.omit(almam) 
# https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# plottings_sf(almam,date,sick,Group,d)

# Copiloting helped with some of the comments. (But not with the streamlining.)

Other Handy Functions

# Demographics:
# This is for nonscaled (or not-logged or the like 'raw') data only!

# Demographics Function
Demographics <- function(tv_all, Clini, Group) {
  # Determine the condition based on the group
  cond <- switch(Group,
                 'female' = tv_all[,'SEX.1F.2M'] == min(tv_all[,'SEX.1F.2M']),
                 'male' = tv_all[,'SEX.1F.2M'] == max(tv_all[,'SEX.1F.2M']),
                 'All' = rep(TRUE, nrow(tv_all)))
  
  # Filter the data based on the condition
  tv_red <- tv_all[cond, ]
  Clini2 <- Clini[rownames(tv_red), ]
  
  # Calculate demographics
  N <- nrow(tv_red)
  AGE <- median(tv_red[,'AGE'])
  AGEq1 <- quantile(tv_red[,'AGE'], 0.25)
  AGEq3 <- quantile(tv_red[,'AGE'], 0.75)
  BMI <- median(tv_red[,'BMI'])
  BMIq1 <- quantile(tv_red[,'BMI'], 0.25)
  BMIq3 <- quantile(tv_red[,'BMI'], 0.75)
  PFAS <- median(tv_red[,'PFAS'])
  PFASq1 <- quantile(tv_red[,'PFAS'], 0.25)
  PFASq3 <- quantile(tv_red[,'PFAS'], 0.75)
  HDL <- median(Clini2[,'HDL'])
  HDLq1 <- quantile(Clini2[,'HDL'], 0.25)
  HDLq3 <- quantile(Clini2[,'HDL'], 0.75)
  LDL <- median(as.numeric(Clini2[,'LDL']))
  LDLq1 <- quantile(as.numeric(Clini2[,'LDL']), 0.25)
  LDLq3 <- quantile(as.numeric(Clini2[,'LDL']), 0.75)
  SGmin <- min(tv_red[,'Steatosis.Grade.0.To.3'])
  SGmax <- max(tv_red[,'Steatosis.Grade.0.To.3'])
  FSmin <- min(tv_red[,'Fibrosis.Stage.0.to.4'])
  FSmax <- max(tv_red[,'Fibrosis.Stage.0.to.4'])
  NFmin <- min(tv_red[,'Necroinflammation'])
  NFmax <- max(tv_red[,'Necroinflammation'])
  HImin <- min(tv_red[,'HOMA-IR'])
  HImax <- max(tv_red[,'HOMA-IR'])
  HI <- median(tv_red[,'HOMA-IR'])
  HIq1 <- quantile(tv_red[,'HOMA-IR'], 0.25)
  HIq3 <- quantile(tv_red[,'HOMA-IR'], 0.75)
  
  return(c(N, AGE, AGEq1, AGEq3, BMI, BMIq1, BMIq3, PFAS, PFASq1, PFASq3, HDL, HDLq1, HDLq3, LDL, LDLq1, LDLq3,
           SGmin, SGmax, FSmin, FSmax, NFmin, NFmax, HImin, HImax, HI, HIq1, HIq3))
}

# Calculate demographics for each group
Group <- 'All'; d_all <- Demographics(tv, Clini, Group)
Group <- 'female'; d_female <- Demographics(tv, Clini, Group)
Group <- 'male'; d_male <- Demographics(tv, Clini, Group)

# Combine results into a matrix
d_totaali <- t(rbind(d_all, d_female, d_male))
rownames(d_totaali) <- c('N', 'AGE', 'AGEq1', 'AGEq3', 'BMI', 'BMIq1', 'BMIq3', 'PFAS', 'PFASq1', 'PFASq3', 'HDL', 'HDLq1', 'HDLq3', 'LDL', 'LDLq1', 'LDLq3',
                         'SGmin', 'SGmax', 'FSmin', 'FSmax', 'NFmin', 'NFmax', 'HImin', 'HImax', 'HI', 'HIq1', 'HIq3')
d_totaali
##              d_all   d_female     d_male
## N      104.0000000 69.0000000 35.0000000
## AGE     49.0000000 46.0000000 52.0000000
## AGEq1   43.0000000 43.0000000 44.5000000
## AGEq3   54.2500000 53.0000000 58.0000000
## BMI     45.2960308 45.1082579 46.2603878
## BMIq1   41.0973732 41.1066653 41.3139309
## BMIq3   49.0881549 48.7197232 49.6020327
## PFAS     0.4114301  0.3542222  0.4797220
## PFASq1   0.2846164  0.2517962  0.4114301
## PFASq3   0.5679388  0.5326427  0.7002281
## HDL      1.1000000  1.1300000  1.0100000
## HDLq1    0.9575000  0.9700000  0.8700000
## HDLq3    1.2825000  1.3600000  1.1650000
## LDL      2.3000000  2.5000000  1.9000000
## LDLq1    1.6000000  1.9000000  1.3500000
## LDLq3    2.9000000  3.0000000  2.4000000
## SGmin    0.0000000  0.0000000  0.0000000
## SGmax    3.0000000  3.0000000  1.0000000
## FSmin    0.0000000  0.0000000  0.0000000
## FSmax    4.0000000  3.0000000  4.0000000
## NFmin    0.0000000  0.0000000  0.0000000
## NFmax    2.0000000  1.0000000  2.0000000
## HImin    0.2426667  0.7822222  0.2426667
## HImax   14.5244444 14.5244444 10.7555556
## HI       3.1257778  3.0417778  3.9200000
## HIq1     1.7556667  1.6333333  2.2644444
## HIq3     4.5747778  3.8666667  5.4240000
# Sample Size Functions
# Function to determine sample sizes for binary outcomes
sample_size <- function(NAFLD, Outcome, Group) { 
  NAFLDo <- switch(Group,
                   'Male' = NAFLD[NAFLD[,'SEX.1F.2M'] == 2, ],
                   'Female' = NAFLD[NAFLD[,'SEX.1F.2M'] == 1, ],
                   'All' = NAFLD)
  
  n0 <- nrow(NAFLDo[NAFLDo[, Outcome] == 0, ])
  n1 <- nrow(NAFLDo[NAFLDo[, Outcome] > 0, ])
  
  return(c(n0, n1))
}

# Function to determine sample sizes for continuous outcomes
sample_size2 <- function(NAFLD, Outcome, Group, sick_groupe) { 
  sample_data <- c()
  for (i in 1:2) {
    NAFLDo <- switch(Group,
                     'Male' = NAFLD[NAFLD[,'SEX.1F.2M'] == 2 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'Female' = NAFLD[NAFLD[,'SEX.1F.2M'] == 1 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'All' = NAFLD[(i == 1 & !sick_groupe | i == 2 & sick_groupe), ])
    sample_data <- c(sample_data, nrow(NAFLDo))
  }
  return(sample_data)
}

# Prepare NAFLD data
NAFLD <- tv[, 1:28]
NAFLD[NAFLD[, 5] > 0, 5] <- 1
NAFLD[NAFLD[, 6] > 0, 6] <- 1
NAFLD[NAFLD[, 7] > 0, 7] <- 1
NAFLD[NAFLD[, 8] <= 1.5, 8] <- 0
NAFLD[NAFLD[, 8] > 1.5, 8] <- 1
colnames(NAFLD) <- gsub("-", ".", colnames(NAFLD))
colnames(NAFLD) <- gsub("/", ".", colnames(NAFLD))
colnames(NAFLD) <- gsub("11", "X11", colnames(NAFLD))
colnames(NAFLD) <- gsub("17", "X17", colnames(NAFLD))
colnames(NAFLD) <- gsub("#", ".", colnames(NAFLD))
colnames(NAFLD)[colnames(NAFLD) == 'X17aOH.P4'] <- 'X17.aOHP4'

# Calculate sample sizes for each outcome and group
jappend <- c()
outcomes <- c('Steatosis.Grade.0.To.3', 'Fibrosis.Stage.0.to.4', 'Necroinflammation', 'HOMA.IR')
groups <- c('All', 'Female', 'Male')

for (Outcome in outcomes) {
  for (Group in groups) {
    jappend <- c(jappend, sample_size(NAFLD, Outcome, Group))
  }
}

matrix(unlist(jappend), ncol = 8)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]   83   11   62   26   88    9   18   56
## [2,]   21   25   42   19   16   28   86    5
## [3,]   58   10   43   16   60    7   13   30
# Mean and Quantile Calculation Function
mean_q1q3 <- function(Group) {
  cond <- switch(Group,
                 'Female' = tv_all[,'Gender'] == min(tv_all[,'Gender']),
                 'Male' = tv_all[,'Gender'] == max(tv_all[,'Gender']),
                 'All' = rep(TRUE, nrow(tv_all)))
  
  tv_red <- tv[cond, ]
  M <- tv_red[, Mediator]
  
  cM <- round(apply(M, 2, median, na.rm = TRUE), 0)
  quants <- c(0.25, 0.75)
  csd <- round(apply(M, 2, quantile, probs = quants, na.rm = TRUE), 0)
  
  tot <- rbind(cM, csd)
  return(tot)
}

totQ <- mean_q1q3('All')
femQ <- mean_q1q3('Female')
menQ <- mean_q1q3('Male')
tot3 <- cbind(t(totQ), t(femQ), t(menQ))
print(tot3)
##             cM   25%   75%    cM   25%   75%    cM   25%   75%
## E        40685 33628 45755 40538 30857 45250 42424 37906 49608
## 11-KT      926   617  1161   925   606  1163   942   759  1087
## 17a-OHP5  1613   924  3290  1615   776  3293  1612  1190  3050
## S          843   560  1134   810   533  1050   892   576  1346
## B         7258  5046 12538  7545  5054 12526  6495  5085 12852
## 11b-OHA4  8870  6700 11868  9461  6379 11406  8663  7510 12528
## 11-KDHT    100   100   100   100   100   100   100   100   153
## 11-KA4    1262   845  1687  1262   838  1663  1318   890  1699
## DHEA      8294  5602 12330  9395  6072 13100  6683  5301  9524
## T/Epi-T   1225   838  8089   913   717  1216 10808  8204 13367
## 17a-OHP4  1121   631  1717   830   527  1510  1510  1076  1839
## AN         250   250   507   250   250   505   250   250   410
## DHT        276   132   461   203   109   311   524   381   632
## A4        2226  1548  3064  2366  1578  3276  1995  1539  2424
## DOC         62    47    90    60    48    96    67    44    87
## P5        2091  1288  4116  2375  1370  4172  1584  1120  2464
## P4         124    79   262   160    92   586    94    75   146
## A         1105   685  1827  1291   925  2004   722   443  1142
## E2         137    77   271   167    66   441   117   104   175
## E1         271   170   407   321   193   482   232   166   285
#Above goes, but not below...

# I wanted to check the steroids with highest ACMEs between healthy (all, all) and sick (all, all).
# I needed to have the cutoffs and ways to see the overlaps. For that, I developed a function called 'the_combos'
#Some data is needed... :)
path="C:/Users/patati/Desktop/Turku/R/hypo_basic/Tiedostot/"; setwd(path)
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
names(list_of_files) <- files #https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
library(stringr)

the_combos=function(list_of_files,Group,cond) { #cond='' vastaa kaikkia
#General categories of females or males (without 'All, 'MASLD, and 'Menopause')
u <- names(list_of_files)
a <- Group #note the writing, yes with " " in between. " female"  or " male" or " all"
ie=grep(a,u,fixed=TRUE); u2=u[ie]
del=c(grep("All",u2,fixed=TRUE),grep("MASLD",u2,fixed=TRUE),grep("Menopause",u2,fixed=TRUE))
yl=1:length(u2); lop=yl[!yl %in% del]
u=u2[lop] #general male
ie=grep(cond,u,fixed=TRUE); u=u[ie]
a <- "sick"; ie=grep(a,u,fixed=TRUE); u3=u[ie]# sick ones, male or females
u_sick=list_of_files[u3]#https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
a='healthy';ie=grep(a,u,fixed=TRUE);u4=u[ie]
u_healthy=list_of_files[u4] # u_healthy=list_of_files[grep(a,u,fixed=TRUE)] 
tcross=function(u_sick) { 
  all_names = c(); i=1
  for (i in (1:length(u_sick))) { #length(u_sick) is 18
    us=u_sick[[i]]
    aux=c();
    if (dim(us)[1]>200) {aux=200} else {aux=dim(us)[1]}
    # plot(1:aux,as.numeric(us[1:aux,1]))
    values=(1:(length(as.numeric(us[,1]))-1))
    coo=c(); z=0
    for (z in values) {coo=append(coo,abs(us[z,1]-us[(z+1),1]))}  
    pss=which(coo>quantile(coo,0.95));ro=round(length(coo)/3)# pss[pss<ro]#round(length(which(coo>quantile(coo,0.95)))/2)]
    dpp=diff(pss[pss<ro]) #https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
    dpp_sort=sort(dpp,decreasing = TRUE)
    if (length(dpp_sort)<5) { for_comp=length(dpp)+1} else {
      if (sum(dpp_sort[1:5]>5)==5) {cf=dpp_sort[5];cff=which(dpp>cf)[1]} else {cf=dpp_sort[2];cff=which(dpp>cf)[1]}
      cff=which(dpp>(cf-1))[1]
      for_comp=pss[cff]; }
    
    if (aux<30) {for_comp  =max(which(as.vector(us[,1]>0)))}
    if (aux>30) {if (max(pss[pss<ro])<30) (for_comp=30)} #due the small amount of good ones that can be like 4...
    
    rt2=us[1:for_comp,]; j=0
    hoi=c();for (j in 1:dim(rt2)[1]) {hoi=append(hoi,scan(text=rownames(rt2)[j], what=""))}
    hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
    hoi[,2] <- gsub("\\.", "-",  hoi[,2] ) #:)
    xz=round(quantile(table(hoi[,2]),0.25)); if (xz<2) {xz=0} else {xz=xz} #let's start with 25% and if not ok go to like 5%
    names=c();names=names(table(hoi[,2])[table(hoi[,2])>xz]) 
    # print(all_names)
    all_names=append(names,all_names)
  }
  return(sort(table(all_names),decreasing = TRUE))  
}
tc_sick=tcross(u_sick);tc_healthy=tcross(u_healthy) # table(the_cross)# hist(table(the_cross),breaks=10)
cae1=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[2]
if (max(tc_sick)==cae1 | max(tc_sick)==cae2)
cfn=cae2-1 #sometimes as with females no differences, i.. tc_sick;rev(as.numeric(names(table(tc_sick))))[2]-1#
if (is.na(cfn)) {steroid_sick=names(tc_sick)} else {steroid_sick=names(tc_sick[tc_sick>(cfn)])}
cae1=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[2]
if (max(tc_healthy)==cae1 | max(tc_healthy)==cae2)
cfn=cae2-1 
if (is.na(cfn)) {steroid_healthy=names(tc_healthy)} else {steroid_healthy=names(tc_healthy[tc_healthy>(cfn)])}
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
tbe=intersect(steroid_healthy, steroid_sick)
totaali_sh_all=steroid_sick[!steroid_sick %in% tbe] #https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/ 
return(list(steroid_sick,tbe,totaali_sh_all)) } #"17a-OHP4" "DHT"      "DOC"      'P4'

Group = ' all'; cond='';all_all=the_combos(list_of_files,Group,cond) #ekat allit ('All...') oli deletoitu, yes, ja käytetty vain spesifisiä alleja...
Group = ' female'; cond='';female_all=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='';male_all=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Steatosis';all_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Steatosis';female_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Steatosis';male_steatosis=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Fibrosis';all_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Fibrosis';female_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Fibrosis';male_Fibrosis=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='Necroinflammation';all_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='Necroinflammation';female_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='Necroinflammation'; male_Necroinflammation=the_combos(list_of_files,Group,cond) 
Group = ' all'; cond='HOMAIR'; all_HOMAIR=the_combos(list_of_files,Group,cond) 
Group = ' female'; cond='HOMAIR'; female_HOMAIR=the_combos(list_of_files,Group,cond) 
Group = ' male'; cond='HOMAIR'; male_HOMAIR=the_combos(list_of_files,Group,cond) 

pottees=c(all_all[3],female_all[3],male_all[3],
            all_steatosis[3],female_steatosis[3],male_steatosis[3],
            all_Fibrosis[3],female_Fibrosis[3],male_Fibrosis[3],
            all_Necroinflammation[3],female_Necroinflammation[3],male_Necroinflammation[3],
            all_HOMAIR[3],female_HOMAIR[3],male_HOMAIR[3])

# In case you need to print the above list. This is good for printing list of list in anyways:
# mylist <- pottees; file <- paste0("myfile_ok",date,".txt"); conn <- file(description=file, open="w")
# newlist <- lapply(seq_len(length(mylist)), function(i){
#   lapply(seq_len(length(mylist[[i]])), function(j) {
#     temp <- c(i, j, mylist[[i]][[j]])
#     writeLines(text=paste(temp, collapse=","), con=conn, sep="\r\n")  
#   }) }); close(conn)

# So this will give the most common steroids in all cases compared to all cases in all the subjects (all, female, male)
table(unlist(pottees))[rev(order(table(unlist(pottees))))]
## 
##      DOC       AN     MUFA       E1 17a-OHP4  11-KDHT   TG_SFA  TCDCA_L 
##       13       13       10        9        8        7        6        6 
##   GHCA_L      Cer       A4   TLCA_L       PI   Hexcer  GCDCA_L       DG 
##        6        6        6        5        5        5        5        5 
## 11b-OHA4       PE   THCA_L      LPC  TDHCA_L   TDCA_L  TbMCA_L       PC 
##        5        4        3        3        2        2        2        2 
##    GCA_L    DCA_L   CDCA_L    11-KT   UDCA_L  GUDCA_L   GLCA_L  GHDGA_L 
##        2        2        2        2        1        1        1        1 
##       CE     C4_L 
##        1        1
# For comparing the PFAS/steroid/BA (or lipid) in healthy and sick mediation:
# Load all the variables in the folder:
setwd("C:/Users/patati/Desktop/Turku/R/hypo4/Tiedostot/") #check this if needed...
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files:
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
names(list_of_files) <- files #ht
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r

comp_med_two=function(list_of_files) { # I made this a function, since this kind of cross comparison could be handy also in other contexts
  health=c(); sickness=c() # Initialize empty vectors for health and sickness
  
  # Loop through the list of file names and categorize them into health and sickness
  for (i in 1:length(names(list_of_files))) {
    if (str_detect(names(list_of_files)[i],'healthy')) {
      health=append(health, list_of_files[i]) # Append to health if 'healthy' is in the name
    } else if (str_detect(names(list_of_files)[i],'sick')) {
      sickness=append(sickness, list_of_files[i]) # Append to sickness if 'sick' is in the name
    }
  }
  
  health2=c(); sickness2=c() # Initialize empty data frames for health2 and sickness2
  
  # Process health files: order by the first column in descending order and take the top 10 rows
  i=0; for (i in 1:length(c(health))) {
    health[[i]][rev(order(health[[i]][,1])),]
    health2=rbind(health2, health[[i]][1:10,])
  }
  
  # Process sickness files: order by the first column in descending order and take the top 10 rows
  i=0; for (i in 1:length(c(sickness))) {
    sickness[[i]][rev(order(sickness[[i]][,1])),]
    sickness2=rbind(sickness2, sickness[[i]][1:10,])
  }
  
  # Combine row names with the data for health2 and sickness2
  health2=cbind(rownames(health2), health2)
  sickness2=cbind(rownames(sickness2), sickness2)
  
  # Set column names for health2 and sickness2
  colnames(health2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
  colnames(sickness2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
  
  # Replace ".RData" in row names with an empty string
  rownames(health2)=str_replace_all(rep(names(health), each = 10), ".RData", "")
  rownames(sickness2)=str_replace_all(rep(names(sickness), each = 10), ".RData", "")
  # https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
  # https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
  # https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
  
  # Process health2 data for contaminants, steroids, and bile acids or lipids
  hoi=c(); hoi=scan(text=health2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.')
  hoi_healthy=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')]
  
  # Process sickness2 data for contaminants, steroids, and bile acids or lipids
  hoi=c(); hoi=scan(text=sickness2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.')
  hoi_sick=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')]
  ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
  
  return(list(hoi_sick, hoi_healthy)) # Return the processed data as a list
}

cmt=comp_med_two(list_of_files);
hoi_sick=cmt[[1]];hoi_healthy=cmt[[2]]

# So the differences between contaminants (in the sick vs. healthy 'mediation') are:
table(hoi_sick[,1])[rev(order(table(hoi_sick[,1])))];table(hoi_healthy[,1])[rev(order(table(hoi_healthy[,1])))]
## 
##           PFOA          PFHpA          PFHxA          PFHxS PFHxA_Branched 
##             37             35             33             29             20 
##           PFNA           PFOS 
##             16             10
## 
##          PFHpA           PFOS          PFHxA           PFNA PFHxA_Branched 
##             81             37             27             15             12 
##          PFHxS 
##              8
# Differences Between Steroids...
# table(hoi_sick[,2])[rev(order(table(hoi_sick[,2])))];table(hoi_healthy[,2])[rev(order(table(hoi_healthy[,2])))]
# Differences Between BAs/Lipids...
# table(hoi_sick[,3])[rev(order(table(hoi_sick[,3])))];table(hoi_healthy[,3])[rev(order(table(hoi_healthy[,3])))]

# Copiloting helped both with the comments and streamlines.

Disclaimer

# This is a part of on-going research work that has not been published yet and I cannot take a responsibility if something above is not working/ok in your system/research/etc. in 100% accuracy. 
# Furthermore, I am inbound to update these sites relatively often so a grain of salt is needed when reading these... :)
# Please note also that most of the plotting functions have bee commented out so that their execution would not take time when compiling as well as due quality reasons: Rmarkdown print is not as good in the screen and in final (html) document as the similar with knitr's 'include_graphics' that shows the full scale image.
# In addition, no sensitive data has been shared and you see only 'examples' here. Moreover, I am not an expert on biology (or even at computing per se) and may not know your specific biochemical and/or mechanical questions that you maybe using this for. 
# Not to mention few, I just want to say that I am just doing the analysis for understanding the data partly on its own right with the context as mentioned above and partly as a starting 'side project'.
# Thanks and sorry for possible inconveniences in advance! :)

About R Setups

# As per this date, see above or below, 
thedate 
## [1] "290125"
# all the above codes work to produce results with the data files given.
# All the packages have been copied to local drive (12.9.24), i.e. the content of '.libPaths()'
# R is 'R version 4.3.1 (2023-06-16 ucrt)' (with 'version'/'getRversion()' command)
# And RStudio is '2024.4.0.735' (with 'RStudio.Version()' command)
# In addition, the following information regarding the setups is given:
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Finnish_Finland.utf8  LC_CTYPE=Finnish_Finland.utf8   
## [3] LC_MONETARY=Finnish_Finland.utf8 LC_NUMERIC=C                    
## [5] LC_TIME=Finnish_Finland.utf8    
## 
## time zone: Europe/Helsinki
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices datasets  utils    
## [8] methods   base     
## 
## other attached packages:
##   [1] xlsx_0.6.5                  viridis_0.6.5              
##   [3] viridisLite_0.4.2           tufte_0.13                 
##   [5] tint_0.1.4                  superb_0.95.19             
##   [7] sjPlot_2.8.17               scatterplot3d_0.3-44       
##   [9] scater_1.34.0               scuttle_1.16.0             
##  [11] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
##  [13] Biobase_2.66.0              GenomicRanges_1.58.0       
##  [15] GenomeInfoDb_1.42.1         IRanges_2.40.0             
##  [17] S4Vectors_0.44.0            BiocGenerics_0.52.0        
##  [19] MatrixGenerics_1.18.0       matrixStats_1.4.1          
##  [21] scales_1.3.0                rstatix_0.7.2              
##  [23] rmdformats_1.0.4            rmarkdown_2.29             
##  [25] rgl_1.3.14                  reshape2_1.4.4             
##  [27] remotes_2.5.0               readxl_1.4.3               
##  [29] rcompanion_2.4.36           RColorBrewer_1.1-3         
##  [31] ragg_1.3.3                  qpgraph_2.40.0             
##  [33] quantreg_5.99.1             SparseM_1.84-2             
##  [35] psych_2.4.6.26              prettydoc_0.4.1            
##  [37] ppcor_1.1                   plotrix_3.8-4              
##  [39] plyr_1.8.9                  pathviewr_1.1.7            
##  [41] PerformanceAnalytics_2.0.4  xts_0.14.1                 
##  [43] pheatmap_1.0.12             MOFA2_1.16.0               
##  [45] mlma_6.3-1                  gplots_3.2.0               
##  [47] coxme_2.2-22                bdsmatrix_1.3-7            
##  [49] survival_3.7-0              abind_1.4-8                
##  [51] mgcv_1.9-1                  nlme_3.1-166               
##  [53] meta_8.0-1                  metadat_1.2-0              
##  [55] mediation_4.5.0             sandwich_3.1-1             
##  [57] mvtnorm_1.3-2               MASS_7.3-61                
##  [59] mdatools_0.14.2             Maaslin2_1.16.0            
##  [61] magrittr_2.0.3              magick_2.8.5               
##  [63] lsr_0.5.2                   lme4_1.1-35.5              
##  [65] lmtest_0.9-40               zoo_1.8-12                 
##  [67] lavaan_0.6-19               insight_1.0.0              
##  [69] igraph_2.1.1                hrbrthemes_0.8.7           
##  [71] Hmisc_5.2-1                 glmnet_4.1-8               
##  [73] Matrix_1.7-0                ggtext_0.1.2               
##  [75] ggh4x_0.2.8                 ggsankeyfier_0.1.8         
##  [77] ggsankey_0.0.99999          ggpubr_0.6.0               
##  [79] ggplotify_0.1.2             ggforestplot_0.1.0         
##  [81] ggforce_0.4.2               ggcorrplot_0.1.4.1         
##  [83] FSA_0.9.5                   fs_1.6.5                   
##  [85] extrafont_0.19              effsize_0.8.1              
##  [87] dmetar_0.1.0                datarium_0.1.0             
##  [89] daiR_1.0.1                  corrplot_0.95              
##  [91] correlation_0.8.6           ComplexHeatmap_2.22.0      
##  [93] circlize_0.4.16             censReg_0.5-38             
##  [95] maxLik_1.5-2.1              miscTools_0.6-28           
##  [97] car_3.1-3                   carData_3.0-5              
##  [99] brickster_0.2.5             binilib_0.2.0              
## [101] lubridate_1.9.3             forcats_1.0.0              
## [103] stringr_1.5.1               dplyr_1.1.4                
## [105] purrr_1.0.2                 readr_2.1.5                
## [107] tidyr_1.3.1                 tibble_3.2.1               
## [109] ggplot2_3.5.1               tidyverse_2.0.0            
## [111] bigsnpr_1.12.18             bigstatsr_1.6.1            
## 
## loaded via a namespace (and not attached):
##   [1] graph_1.84.0              Formula_1.2-5            
##   [3] zlibbioc_1.52.0           fpc_2.2-13               
##   [5] tidyselect_1.2.1          bit_4.5.0.1              
##   [7] doParallel_1.0.17         clue_0.3-66              
##   [9] lattice_0.22-6            rjson_0.2.23             
##  [11] blob_1.2.4                rngtools_1.5.2           
##  [13] S4Arrays_1.6.0            parallel_4.4.1           
##  [15] png_0.1-8                 cli_3.6.3                
##  [17] bayestestR_0.15.0         sjstats_0.19.0           
##  [19] askpass_1.2.1             openssl_2.2.2            
##  [21] gargle_1.5.2              textshaping_0.4.0        
##  [23] BiocIO_1.16.0             kernlab_0.9-33           
##  [25] BiocNeighbors_2.0.1       basilisk.utils_1.18.0    
##  [27] uwot_0.2.2                curl_6.0.1               
##  [29] mime_0.12                 evaluate_1.0.1           
##  [31] coin_1.4-3                stringi_1.8.4            
##  [33] backports_1.5.0           Exact_3.3                
##  [35] XML_3.99-0.17             httpuv_1.6.15            
##  [37] flexmix_2.3-19            AnnotationDbi_1.68.0     
##  [39] rappdirs_0.3.3            splines_4.4.1            
##  [41] nortest_1.0-4             mclust_6.1.1             
##  [43] getopt_1.20.4             jpeg_0.1-10              
##  [45] doRNG_1.8.6               pcaPP_2.0-5              
##  [47] glmmML_1.1.7              ggbeeswarm_0.7.2         
##  [49] rootSolve_1.8.2.4         lpSolve_5.6.22           
##  [51] arrow_17.0.0.1            DBI_1.2.3                
##  [53] HDF5Array_1.34.0          jquerylib_0.1.4          
##  [55] bigparallelr_0.3.2        withr_3.0.2              
##  [57] class_7.3-22              systemfonts_1.1.0        
##  [59] vwline_0.2-4              rtracklayer_1.66.0       
##  [61] htmlwidgets_1.6.4         flock_0.7                
##  [63] ggrepel_0.9.6             labeling_0.4.3           
##  [65] SparseArray_1.6.0         cellranger_1.1.0         
##  [67] DESeq2_1.46.0             DEoptimR_1.1-3-1         
##  [69] diptest_0.77-1            lmom_3.2                 
##  [71] annotate_1.84.0           reticulate_1.40.0        
##  [73] XVector_0.46.0            knitr_1.49               
##  [75] UCSC.utils_1.2.0          timechange_0.3.0         
##  [77] pbivnorm_0.6.0            foreach_1.5.2            
##  [79] fansi_1.0.6               caTools_1.18.3           
##  [81] qtl_1.70                  data.table_1.16.2        
##  [83] biglm_0.9-3               rhdf5_2.50.0             
##  [85] irlba_2.3.5.1             gridBezier_1.1-1         
##  [87] extrafontdb_1.0           DescTools_0.99.58        
##  [89] gridGraphics_0.5-1        lazyeval_0.2.2           
##  [91] yaml_2.3.10               crayon_1.5.3             
##  [93] later_1.4.1               tweenr_2.0.3             
##  [95] bigassertr_0.1.6          Rgraphviz_2.50.0         
##  [97] codetools_0.2-20          base64enc_0.1-3          
##  [99] GlobalOptions_0.1.2       sjlabelled_1.2.0         
## [101] KEGGREST_1.46.0           Rtsne_0.17               
## [103] shape_1.4.6.1             shinyBS_0.61.1           
## [105] Rsamtools_2.22.0          gdtools_0.4.1            
## [107] filelock_1.0.3            foreign_0.8-87           
## [109] pkgconfig_2.0.3           xml2_1.3.6               
## [111] mathjaxr_1.6-0            GenomicAlignments_1.42.0 
## [113] performance_0.12.4        xtable_1.8-4             
## [115] httr_1.4.7                rbibutils_2.3            
## [117] tools_4.4.1               prabclus_2.3-4           
## [119] beeswarm_0.4.0            htmlTable_2.4.3          
## [121] broom_1.0.7               checkmate_2.3.2          
## [123] rrapply_1.2.7             googleAuthR_2.0.2        
## [125] ggeffects_2.0.0           MatrixModels_0.5-3       
## [127] assertthat_0.2.1          digest_0.6.37            
## [129] optparse_1.7.5            numDeriv_2016.8-1.1      
## [131] bookdown_0.41             dir.expiry_1.14.0        
## [133] farver_2.1.2              tzdb_0.4.0               
## [135] yulab.utils_0.1.8         rpart_4.1.23             
## [137] glue_1.8.0                cachem_1.1.0             
## [139] polyclip_1.10-7           generics_0.1.3           
## [141] Biostrings_2.74.0         CompQuadForm_1.4.3       
## [143] mnormt_2.1.1              ScaledMatrix_1.14.0      
## [145] fontBitstreamVera_0.1.1   minqa_1.2.8              
## [147] httr2_1.0.7               ggstance_0.3.7           
## [149] utf8_1.2.4                sjmisc_2.8.10            
## [151] basilisk_1.18.0           gtools_3.9.5             
## [153] datawizard_0.13.0         ggsignif_0.6.4           
## [155] shiny_1.9.1               gridExtra_2.3            
## [157] GenomeInfoDbData_1.2.13   rhdf5filters_1.18.0      
## [159] RCurl_1.98-1.16           memoise_2.0.1            
## [161] gld_2.6.6                 fontLiberation_0.1.0     
## [163] renv_1.0.11               rmio_0.4.0               
## [165] netmeta_2.9-0             rstudioapi_0.17.1        
## [167] cluster_2.1.6             magic_1.6-1              
## [169] hms_1.1.3                 munsell_0.5.1            
## [171] cowplot_1.1.3             colorspace_2.1-1         
## [173] poibin_1.6                rlang_1.1.4              
## [175] quadprog_1.5-8            collapse_2.0.18          
## [177] xfun_0.49                 multcompView_0.1-10      
## [179] TH.data_1.1-2             e1071_1.7-16             
## [181] metafor_4.6-0             iterators_1.0.14         
## [183] modeltools_0.2-23         libcoin_1.0-10           
## [185] rJava_1.0-11              Rhdf5lib_1.28.0          
## [187] bigsparser_0.7.3          promises_1.3.2           
## [189] bitops_1.0-9              Rdpack_2.6.2             
## [191] RSQLite_2.3.9             proxy_0.4-27             
## [193] fgsea_1.32.0              DelayedArray_0.32.0      
## [195] compiler_4.4.1            beachmat_2.22.0          
## [197] boot_1.3-31               Rcpp_1.0.13-1            
## [199] Rttf2pt1_1.3.12           BiocSingular_1.22.0      
## [201] fontquiver_0.2.1          googleCloudStorageR_0.7.0
## [203] BiocParallel_1.40.0       gridtext_0.1.5           
## [205] R6_2.5.1                  multcomp_1.4-26          
## [207] fastmap_1.2.0             fastmatch_1.1-4          
## [209] vipor_0.4.7               rsvd_1.0.5               
## [211] nnet_7.3-19               gtable_0.3.6             
## [213] MuMIn_1.48.4              KernSmooth_2.23-24       
## [215] htmltools_0.5.8.1         bit64_4.5.2              
## [217] lifecycle_1.0.4           zip_2.3.1                
## [219] nloptr_2.1.1              restfulr_0.0.15          
## [221] xlsxjars_0.6.1            sass_0.4.9               
## [223] vctrs_0.6.5               plm_2.6-4                
## [225] robustbase_0.99-4-1       haven_2.5.4              
## [227] bslib_0.8.0               pillar_1.9.0             
## [229] GenomicFeatures_1.58.0    locfit_1.5-9.10          
## [231] expm_1.0-0                jsonlite_1.8.9           
## [233] GetoptLong_1.0.5

References

# Here are the reference links unceremoniously in order of appearance:

# https://stackoverflow.com/questions/43527520/r-markdown-yaml-scanner-error-mapping-values
# https://appsilon.com/imputation-in-r 
# https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
# https://readxl.tidyverse.org/articles/column-names.html
# https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-
# https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character 
# https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
# https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/ 
# https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-
# https://mdatools.com/docs/preprocessing--autoscaling.html
# https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
# https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
# https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
# https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender
# https://stackoverflow.com/questions/32539222/group-boxplot-data-while-keeping-their-individual-x-axis-labels-in-ggplot2-in-r
# https://stackoverflow.com/questions/34522732/changing-fonts-in-ggplot2
# https://stackoverflow.com/questions/37488075/align-axis-label-on-the-right-with-ggplot2
# http://www.sthda.com/english/wiki/ggplot2-rotate-a-graph-
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
# https://stackoverflow.com/questions/76758153/is-there-a-way-to-change-the-asterisks-to-match-custom-p-values 
# https://datavizpyr.com/horizontal-boxplots-with-ggplot2-in-r/
# https://stackoverflow.com/questions/72564551/a-custom-legend-unrelated-to-data-in-ggplot
# https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
# https://stackoverflow.com/questions/66429500/how-to-make-raggagg-png-device-work-with-ggsave
# https://stats.stackexchange.com/questions/237256/is-the-shapiro-wilk-test-only-applicable-to-smaller-sample-sizes  
# https://statisticsbyjim.com/hypothesis-testing/nonparametric-parametric-tests/
# https://www.statology.org/mean-standard-deviation-grouped-data/  
# https://amsi.org.au/ESA_Senior_Years/SeniorTopic4/4h/4h_2content_11.html  
# https://www.themathdoctors.org/mean-and-standard-deviation-of-grouped-data/
# https://stackoverflow.com/questions/29825537/group-categories-in-r-according-to-first-letters-of-a-string
# https://datatofish.com/create-dataframe-in-r/
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot
# http://www.sthda.com/english/wiki/unpaired-two-samples-wilcoxon-test-in-r
# https://blogs.sas.com/content/iml/2011/04/27/log-transformations-how-to-handle-negative-data-values.html
# https://stats.stackexchange.com/questions/155429/how-to-transform-negative-values-to-logarithms
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1534033/
# https://www.statisticshowto.com/probability-and-statistics/statistics-definitions/parametric-and-non-parametric-data/
# https://www.bmj.com/content/312/7038/1079.full 
# https://stats.stackexchange.com/questions/589920/how-can-i-back-transform-a-log-data-to-interpret-t-test-and-get-original-ci
# https://www.biostars.org/p/16481/
# https://whitlockschluter3e.zoology.ubc.ca/RLabs/R_tutorial_Contingency_analysis.html  
# https://sphweb.bumc.bu.edu/otlt/mph-modules/ep/ep713_randomerror/ep713_randomerror6.html
# https://www.r-bloggers.com/2015/01/easy-error-propagation-in-r/ 
# https://www.biostars.org/p/342756/ 
# https://en.wikipedia.org/wiki/Tukey%27s_range_test 
# https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
# Jukka Vaari 1993 Fysiikan Laboratoriotyöt :)
# https://www.r-bloggers.com/2020/03/how-to-standardize-group-colors-in-data-visualizations-in-r/
# https://stackoverflow.com/questions/71745719/how-to-control-stripe-transparency-using-ggforestplot-geom-stripes
# https://stackoverflow.com/questions/10547487/remove-facet-wrap-labels-completely
# https://stackoverflow.com/questions/24169675/multiple-colors-in-a-facet-strip-background-in-ggplot
# http://www.sthda.com/english/wiki/ggplot2-axis-scales-and-transformations 
# https://www.statology.org/geom_point-fill/
# https://stackoverflow.com/questions/62093084/set-geom-vline-line-types-and-sizes-with-aes-mapping-in-ggplot2
# https://en.wikipedia.org/wiki/Standard_error
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
# https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
# https://jokergoo.github.io/circlize_book/book/advanced-layout.htmlcombine-circular-plots
# https://stackoverflow.com/questions/31943102/rotate-labels-in-a-chorddiagram-r-circlize
# https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
# https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.htmlfigure-sizing
# https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html
# https://bioinformatics.stackexchange.com/questions/22414/when-analysing-microarray-data-is-it-need-to-do-normalization-and-standardizati
# https://bioinformatics.stackexchange.com/questions/22426/inconsistent-microarray-expression-levels-after-normalizing-with-log2
# https://www.reddit.com/r/bioinformatics/comments/1ejs94m/log2_transformation_and_quantile_normalization/
# https://www.statology.org/cohens-d-in-r/
# https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use   
# https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
# https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
# https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
# https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
# https://scales.arabpsychology.com/stats/how-to-remove-the-last-character-from-a-string-in-r-2-examples/
# https://ggplot2.tidyverse.org/reference/geom_smooth.html
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
# https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
# https://r-graph-gallery.com/network.html)
# https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
# https://rdrr.io/cran/mlma/man/data.org.html 
# https://cran.r-project.org/web/packages/mma/mma.pdf
# https://www.statology.org/glm-vs-lm-in-r/
# https://intro2r.com/loops.html 
# https://www.benjaminbell.co.uk/2022/12/loops-in-r-nested-loops.html 
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://topepo.github.io/caret/parallel-processing.html 
# https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html
# https://ds-pl-r-book.netlify.app/optimization-in-r.html
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot 
# https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# https://statisticsglobe.com/change-font-size-corrplot-r
# https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html 
# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
# https://github.com/MarioniLab/miloR
# https://www.nature.com/articles/s41467-023-40458-9/figures/4
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct 
# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
# https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
# https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
# https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
# https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/